From 5b452bd1decbc1b11481c78af4e8c39be270210b Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Fri, 4 Oct 2024 09:20:06 +0000 Subject: [PATCH 01/16] add isScalar and isFixedVector --- llvm/include/llvm/CodeGenTypes/LowLevelType.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/include/llvm/CodeGenTypes/LowLevelType.h b/llvm/include/llvm/CodeGenTypes/LowLevelType.h index 06879e1f8d15b..891d6174048ea 100644 --- a/llvm/include/llvm/CodeGenTypes/LowLevelType.h +++ b/llvm/include/llvm/CodeGenTypes/LowLevelType.h @@ -146,6 +146,9 @@ class LLT { constexpr bool isScalar() const { return IsScalar; } constexpr bool isToken() const { return IsScalar && RawData == 0; }; constexpr bool isVector() const { return isValid() && IsVector; } + constexpr bool isScalar(unsigned Size) const { + return isScalar() && getScalarSizeInBits() == Size; + } constexpr bool isPointer() const { return isValid() && IsPointer && !IsVector; } @@ -176,6 +179,12 @@ class LLT { /// if the LLT is not a vector type. constexpr bool isFixedVector() const { return isVector() && !isScalable(); } + constexpr bool isFixedVector(unsigned NumElements, + unsigned ScalarSize) const { + return isFixedVector() && getNumElements() == NumElements && + getScalarSizeInBits() == ScalarSize; + } + /// Returns true if the LLT is a scalable vector. Returns false otherwise, /// even if the LLT is not a vector type. constexpr bool isScalableVector() const { return isVector() && isScalable(); } From 0f7b4f7b5f375cb4ea6e1f6fcd086848e0078f52 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 12 Feb 2025 13:34:07 +0100 Subject: [PATCH 02/16] isScalar / isFixedVector FPInfo: IRTranslator and CallLowering isScalar FPInfo: AMDGPUISel isScalar FPInfo: AMDGPURegBankLegalize isScalar FPInfo: AMDGPULegalizerInfo and Combiner isScalar FPInfo: CombinerHelper isScalar FPInfo: LegalizerHelper isScalar --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 8 +-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 2 +- .../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 4 +- .../AMDGPUGlobalISelDivergenceLowering.cpp | 6 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 42 ++++++-------- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 +- .../AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 6 +- .../AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 7 +-- .../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 4 +- .../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 7 +-- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 58 +++++++++---------- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 2 +- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 50 ++++++++-------- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 41 ++++++------- 15 files changed, 117 insertions(+), 128 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 0dfbb91f2ac54..c8216548aa15e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6821,7 +6821,7 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, LLT TrueTy = MRI.getType(Select->getTrueReg()); // We only do this combine for scalar boolean conditions. - if (CondTy != LLT::scalar(1)) + if (!CondTy.isScalar(1)) return false; if (TrueTy.isPointer()) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ed8bd25698c03..c6cdf33dd8c6e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2105,7 +2105,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, const unsigned Offset = (I - 1) * PartSize; Register SrcReg = MI.getOperand(I).getReg(); - assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); + assert(MRI.getType(SrcReg).isScalar(PartSize)); auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); @@ -6611,7 +6611,7 @@ LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, // If all finite floats fit into the narrowed integer type, we can just swap // out the result type. This is practically only useful for conversions from // half to at least 16-bits, so just handle the one case. - if (SrcTy.getScalarType() != LLT::scalar(16) || + if (!SrcTy.getScalarType().isScalar(16) || NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u)) return UnableToLegalize; @@ -7610,7 +7610,7 @@ LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); - if (SrcTy == LLT::scalar(1)) { + if (SrcTy.isScalar(1)) { auto True = MIRBuilder.buildFConstant(DstTy, 1.0); auto False = MIRBuilder.buildFConstant(DstTy, 0.0); MIRBuilder.buildSelect(Dst, Src, True, False); @@ -9278,7 +9278,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { // The condition was potentially zero extended before, but we want a sign // extended boolean. - if (MaskTy != LLT::scalar(1)) + if (!MaskTy.isScalar(1)) MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0); // Continue the sign extension (or truncate) to match the data type. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 478a4c161fce7..80ee864e7d291 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -72,7 +72,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler { if (TRI->isSGPRReg(MRI, PhysReg)) { LLT Ty = MRI.getType(ExtReg); LLT S32 = LLT::scalar(32); - if (Ty != S32) { + if (!Ty.isScalar(32)) { // FIXME: We should probably support readfirstlane intrinsics with all // legal 32-bit types. assert(Ty.getSizeInBits() == 32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 46194ab46ff6a..22cd79c8cc205 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -409,7 +409,7 @@ static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, const MachineInstr *Def = MRI.getVRegDef(Reg); if (Def->getOpcode() == TargetOpcode::G_FPEXT) { Register SrcReg = Def->getOperand(1).getReg(); - return MRI.getType(SrcReg) == LLT::scalar(16); + return MRI.getType(SrcReg).isScalar(16); } if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) { @@ -428,7 +428,7 @@ bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src2) const { assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC); Register SrcReg = MI.getOperand(1).getReg(); - if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32)) + if (!MRI.hasOneNonDBGUse(SrcReg) || !MRI.getType(SrcReg).isScalar(32)) return false; return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp index fb258547e8fb9..d96d1f5ad39f9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp @@ -87,7 +87,7 @@ DivergenceLoweringHelper::DivergenceLoweringHelper( // _(s1) -> SReg_32/64(s1) void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const { - assert(MRI->getType(DstReg) == LLT::scalar(1)); + assert(MRI->getType(DstReg).isScalar(1)); if (MRI->getRegClassOrNull(DstReg)) { if (MRI->constrainRegClass(DstReg, ST->getBoolRC())) @@ -100,13 +100,11 @@ void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const { void DivergenceLoweringHelper::getCandidatesForLowering( SmallVectorImpl &Vreg1Phis) const { - LLT S1 = LLT::scalar(1); - // Add divergent i1 phis to the list for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB.phis()) { Register Dst = MI.getOperand(0).getReg(); - if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst)) + if (MRI->getType(Dst).isScalar(1) && MUI->isDivergent(Dst)) Vreg1Phis.push_back(&MI); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 441fb5730a6d8..60cfc531868ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -105,7 +105,7 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI, MachineOperand &Src = MI.getOperand(1); // TODO: This should be legalized to s32 if needed - if (MRI->getType(Dst.getReg()) == LLT::scalar(1)) + if (MRI->getType(Dst.getReg()).isScalar(1)) return false; const TargetRegisterClass *DstRC @@ -293,7 +293,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { // - divergent S1 G_PHI should go through lane mask merging algorithm // and be fully inst-selected in AMDGPUGlobalISelDivergenceLowering // - uniform S1 G_PHI should be lowered into S32 G_PHI in AMDGPURegBankSelect - if (DefTy == LLT::scalar(1)) + if (DefTy.isScalar(1)) return false; // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) @@ -733,9 +733,8 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const { // Selection logic below is for V2S16 only. // For G_BUILD_VECTOR_TRUNC, additionally check that the operands are s32. Register Dst = MI.getOperand(0).getReg(); - if (MRI->getType(Dst) != LLT::fixed_vector(2, 16) || - (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC && - SrcTy != LLT::scalar(32))) + if (!MRI->getType(Dst).isFixedVector(2, 16) || + (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC && !SrcTy.isScalar(32))) return selectImpl(MI, *CoverageInfo); const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI); @@ -1073,9 +1072,9 @@ bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const { LLT Ty = MRI->getType(Dst0); unsigned Opc; - if (Ty == LLT::scalar(32)) + if (Ty.isScalar(32)) Opc = AMDGPU::V_DIV_SCALE_F32_e64; - else if (Ty == LLT::scalar(64)) + else if (Ty.isScalar(64)) Opc = AMDGPU::V_DIV_SCALE_F64_e64; else return false; @@ -2390,11 +2389,10 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI->getType(DstReg); const LLT SrcTy = MRI->getType(SrcReg); - const LLT S1 = LLT::scalar(1); const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); const RegisterBank *DstRB; - if (DstTy == S1) { + if (DstTy.isScalar(1)) { // This is a special case. We don't treat s1 for legalization artifacts as // vcc booleans. DstRB = SrcRB; @@ -2432,7 +2430,7 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { return true; } - if (DstTy == LLT::fixed_vector(2, 16) && SrcTy == LLT::fixed_vector(2, 32)) { + if (DstTy.isFixedVector(2, 16) && SrcTy.isFixedVector(2, 32)) { MachineBasicBlock *MBB = I.getParent(); const DebugLoc &DL = I.getDebugLoc(); @@ -2724,8 +2722,7 @@ static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, if (Shuffle->getOpcode() != AMDGPU::G_SHUFFLE_VECTOR) return false; - assert(MRI.getType(Shuffle->getOperand(0).getReg()) == - LLT::fixed_vector(2, 16)); + assert(MRI.getType(Shuffle->getOperand(0).getReg()).isFixedVector(2, 16)); ArrayRef Mask = Shuffle->getOperand(3).getShuffleMask(); assert(Mask.size() == 2); @@ -2749,8 +2746,7 @@ bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const { Register Src = I.getOperand(1).getReg(); - if (MRI->getType(Dst) == LLT::scalar(32) && - MRI->getType(Src) == LLT::scalar(16)) { + if (MRI->getType(Dst).isScalar(32) && MRI->getType(Src).isScalar(16)) { if (isExtractHiElt(*MRI, Src, Src)) { MachineBasicBlock *BB = I.getParent(); BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst) @@ -2778,7 +2774,7 @@ bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const { Register Dst = MI.getOperand(0).getReg(); const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI); if (DstRB->getID() != AMDGPU::SGPRRegBankID || - MRI->getType(Dst) != LLT::scalar(64)) + !MRI->getType(Dst).isScalar(64)) return false; Register Src = MI.getOperand(1).getReg(); @@ -2824,7 +2820,7 @@ bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const { Register Dst = MI.getOperand(0).getReg(); const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI); if (DstRB->getID() != AMDGPU::SGPRRegBankID || - MRI->getType(Dst) != LLT::scalar(64)) + !MRI->getType(Dst).isScalar(64)) return false; Register Src = MI.getOperand(1).getReg(); @@ -2996,7 +2992,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { // RegBankSelect knows what it's doing if the branch condition is scc, even // though it currently does not. if (!isVCC(CondReg, *MRI)) { - if (MRI->getType(CondReg) != LLT::scalar(32)) + if (!MRI->getType(CondReg).isScalar(32)) return false; CondPhysReg = AMDGPU::SCC; @@ -3459,7 +3455,7 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { Register ZExtSrc; if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc)))) - return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register(); + return MRI.getType(ZExtSrc).isScalar(32) ? ZExtSrc : Register(); // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0) const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); @@ -3467,7 +3463,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { return Register(); assert(Def->getNumOperands() == 3 && - MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64)); + MRI.getType(Def->getOperand(0).getReg()).isScalar(64)); if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { return Def->getOperand(1).getReg(); } @@ -4058,7 +4054,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { // This is a workaround. For extension from type i1, `selectImpl()` uses // patterns from TD file and generates an illegal VGPR to SGPR COPY as type // i1 can only be hold in a SGPR class. - if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) && + if (!MRI->getType(I.getOperand(1).getReg()).isScalar(1) && selectImpl(I, *CoverageInfo)) return true; return selectG_SZA_EXT(I); @@ -4291,7 +4287,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl( if (MI->getOpcode() == AMDGPU::G_FNEG && // It's possible to see an f32 fneg here, but unlikely. // TODO: Treat f32 fneg as only high bit. - MRI.getType(Src) == LLT::fixed_vector(2, 16)) { + MRI.getType(Src).isFixedVector(2, 16)) { Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); Src = MI->getOperand(1).getReg(); MI = MRI.getVRegDef(Src); @@ -5789,7 +5785,7 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const { if (!EncodedOffset) return std::nullopt; - assert(MRI->getType(SOffset) == LLT::scalar(32)); + assert(MRI->getType(SOffset).isScalar(32)); return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}}; } @@ -5804,7 +5800,7 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root, std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); if (mi_match(Src, *MRI, m_GFPExt(m_Reg(Src)))) { - assert(MRI->getType(Src) == LLT::scalar(16)); + assert(MRI->getType(Src).isScalar(16)); // Only change Src if src modifier could be gained. In such cases new Src // could be sgpr but this does not violate constant bus restriction for diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b3a8183beeacf..d455ace4e394a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -278,7 +278,7 @@ static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) { if (!QueryTy.isVector()) return false; const LLT EltTy = QueryTy.getElementType(); - return EltTy == LLT::scalar(16) || EltTy.getSizeInBits() >= 32; + return EltTy.isScalar(16) || EltTy.getSizeInBits() >= 32; }; } @@ -2451,7 +2451,7 @@ bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI, MachineIRBuilder &B) const { Register Src = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Src); - assert(Ty.isScalar() && Ty.getSizeInBits() == 64); + assert(Ty.isFloat() && Ty.getSizeInBits() == 64); APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); @@ -2480,7 +2480,7 @@ bool AMDGPULegalizerInfo::legalizeFceil( const LLT S64 = LLT::scalar(64); Register Src = MI.getOperand(1).getReg(); - assert(MRI.getType(Src) == S64); + assert(MRI.getType(Src).isFloat(64)); // result = trunc(src) // if (src > 0.0 && src != result) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 888817e52e35d..f1502f5b71017 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -213,7 +213,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat( // types are legalized. v4i8 -> v4f32 is probably the only case to worry // about in practice. LLT Ty = MRI.getType(DstReg); - if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { + if (Ty.isScalar(32) || Ty.isScalar(16)) { Register SrcReg = MI.getOperand(1).getReg(); unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); @@ -349,7 +349,7 @@ void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN( const LLT S32 = LLT::scalar(32); Register CvtSrc = MatchInfo.CvtVal; LLT SrcTy = MRI.getType(MatchInfo.CvtVal); - if (SrcTy != S32) { + if (!SrcTy.isScalar(32)) { assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); } @@ -418,7 +418,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64( MachineInstr &MI, unsigned &NewOpcode) const { Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); - if (MRI.getType(Src0) != LLT::scalar(64)) + if (!MRI.getType(Src0).isScalar(64)) return false; if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index 52c6e5274ae5b..cf742511f916e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -119,11 +119,11 @@ bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16( // Try to find a pattern where an i64 value should get clamped to short. const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); - if (SrcType != LLT::scalar(64)) + if (!SrcType.isScalar(64)) return false; const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - if (DstType != LLT::scalar(16)) + if (!DstType.isScalar(16)) return false; Register Base; @@ -177,8 +177,7 @@ void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { Register Src = MatchInfo.Origin; - assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == - LLT::scalar(64)); + assert(MI.getParent()->getParent()->getRegInfo().getType(Src).isScalar(64)); const LLT S32 = LLT::scalar(32); auto Unmerge = B.buildUnmerge(S32, Src); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index 98c48f4fe3705..68312ef657af3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -195,7 +195,7 @@ bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3( // med3 for i16 is only available on gfx9+, and not available for v2i16. LLT Ty = MRI.getType(Dst); - if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32)) + if ((!Ty.isScalar(16) || !STI.hasMed3_16()) && !Ty.isScalar(32)) return false; MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); @@ -238,7 +238,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3( LLT Ty = MRI.getType(Dst); // med3 for f16 is only available on gfx9+, and not available for v2f16. - if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32)) + if ((!Ty.isScalar(16) || !STI.hasMed3_16()) && !Ty.isScalar(32)) return false; auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 8d3e7829e10e1..966de6f00a4a5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -125,7 +125,7 @@ class AMDGPURegBankLegalizeCombiner { return true; const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); - return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1); + return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg).isScalar(1); } void cleanUpAfterCombine(MachineInstr &MI, MachineInstr *Optional0) { @@ -235,10 +235,9 @@ class AMDGPURegBankLegalizeCombiner { // Search through MRI for virtual registers with sgpr register bank and S1 LLT. [[maybe_unused]] static Register getAnySgprS1(const MachineRegisterInfo &MRI) { - const LLT S1 = LLT::scalar(1); for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { Register Reg = Register::index2VirtReg(i); - if (MRI.def_empty(Reg) || MRI.getType(Reg) != S1) + if (MRI.def_empty(Reg) || !MRI.getType(Reg).isScalar(1)) continue; const RegisterBank *RB = MRI.getRegBankOrNull(Reg); @@ -316,7 +315,7 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { Opc == AMDGPU::G_IMPLICIT_DEF)) { Register Dst = MI->getOperand(0).getReg(); // Non S1 types are trivially accepted. - if (MRI.getType(Dst) != LLT::scalar(1)) { + if (!MRI.getType(Dst).isScalar(1)) { assert(MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID); continue; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 3c007987b8494..24463b138cea6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -258,13 +258,14 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, // TODO: executeInWaterfallLoop(... WaterfallSgprs) } -LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { +bool RegBankLegalizeHelper::isValidTyForID(LLT Ty, + RegBankLLTMappingApplyID ID) { switch (ID) { case Vcc: case UniInVcc: - return LLT::scalar(1); + return Ty.isScalar(1); case Sgpr16: - return LLT::scalar(16); + return Ty.isScalar(16); case Sgpr32: case Sgpr32Trunc: case Sgpr32AExt: @@ -272,28 +273,28 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case Sgpr32SExt: case UniInVgprS32: case Vgpr32: - return LLT::scalar(32); + return Ty.isScalar(32); case Sgpr64: case Vgpr64: - return LLT::scalar(64); + return Ty.isScalar(64); case SgprP1: case VgprP1: - return LLT::pointer(1, 64); + return Ty == LLT::pointer(1, 64); case SgprP3: case VgprP3: - return LLT::pointer(3, 32); + return Ty == LLT::pointer(3, 32); case SgprP4: case VgprP4: - return LLT::pointer(4, 64); + return Ty == LLT::pointer(4, 64); case SgprP5: case VgprP5: - return LLT::pointer(5, 32); + return Ty == LLT::pointer(5, 32); case SgprV4S32: case VgprV4S32: case UniInVgprV4S32: - return LLT::fixed_vector(4, 32); + return Ty.isFixedVector(4, 32); default: - return LLT(); + return Ty == LLT(); } } @@ -302,7 +303,7 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { case SgprB32: case VgprB32: case UniInVgprB32: - if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || + if (Ty.isScalar(32) || Ty.isFixedVector(2, 16) || Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || Ty == LLT::pointer(6, 32)) return Ty; @@ -310,37 +311,34 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { case SgprB64: case VgprB64: case UniInVgprB64: - if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || - Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || - Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) + if (Ty.isScalar(64) || Ty.isFixedVector(2, 32) || Ty.isFixedVector(4, 16) || + Ty == LLT::pointer(0, 64) || Ty == LLT::pointer(1, 64) || + Ty == LLT::pointer(4, 64)) return Ty; return LLT(); case SgprB96: case VgprB96: case UniInVgprB96: - if (Ty == LLT::scalar(96) || Ty == LLT::fixed_vector(3, 32) || - Ty == LLT::fixed_vector(6, 16)) + if (Ty.isScalar(96) || Ty.isFixedVector(3, 32) || Ty.isFixedVector(6, 16)) return Ty; return LLT(); case SgprB128: case VgprB128: case UniInVgprB128: - if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) || - Ty == LLT::fixed_vector(2, 64)) + if (Ty.isScalar(128) || Ty.isFixedVector(4, 32) || Ty.isFixedVector(2, 64)) return Ty; return LLT(); case SgprB256: case VgprB256: case UniInVgprB256: - if (Ty == LLT::scalar(256) || Ty == LLT::fixed_vector(8, 32) || - Ty == LLT::fixed_vector(4, 64) || Ty == LLT::fixed_vector(16, 16)) + if (Ty.isScalar(256) || Ty.isFixedVector(8, 32) || + Ty.isFixedVector(4, 64) || Ty.isFixedVector(16, 16)) return Ty; return LLT(); case SgprB512: case VgprB512: case UniInVgprB512: - if (Ty == LLT::scalar(512) || Ty == LLT::fixed_vector(16, 32) || - Ty == LLT::fixed_vector(8, 64)) + if (Ty.isScalar(512) || Ty.isFixedVector(16, 32) || Ty.isFixedVector(8, 64)) return Ty; return LLT(); default: @@ -430,7 +428,7 @@ void RegBankLegalizeHelper::applyMappingDst( case VgprP4: case VgprP5: case VgprV4S32: { - assert(Ty == getTyFromID(MethodIDs[OpIdx])); + assert(isValidTyForID(Ty, MethodIDs[OpIdx])); assert(RB == getRegBankFromID(MethodIDs[OpIdx])); break; } @@ -464,7 +462,7 @@ void RegBankLegalizeHelper::applyMappingDst( } case UniInVgprS32: case UniInVgprV4S32: { - assert(Ty == getTyFromID(MethodIDs[OpIdx])); + assert(isValidTyForID(Ty, MethodIDs[OpIdx])); assert(RB == SgprRB); Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty}); Op.setReg(NewVgprDst); @@ -537,7 +535,7 @@ void RegBankLegalizeHelper::applyMappingSrc( case SgprP4: case SgprP5: case SgprV4S32: { - assert(Ty == getTyFromID(MethodIDs[i])); + assert(isValidTyForID(Ty, MethodIDs[i])); assert(RB == getRegBankFromID(MethodIDs[i])); break; } @@ -560,7 +558,7 @@ void RegBankLegalizeHelper::applyMappingSrc( case VgprP4: case VgprP5: case VgprV4S32: { - assert(Ty == getTyFromID(MethodIDs[i])); + assert(isValidTyForID(Ty, MethodIDs[i])); if (RB != VgprRB) { auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg); Op.setReg(CopyToVgpr.getReg(0)); @@ -619,7 +617,7 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); - if (Ty == LLT::scalar(1) && MUI.isUniform(Dst)) { + if (Ty.isScalar(1) && MUI.isUniform(Dst)) { B.setInsertPt(*MI.getParent(), MI.getParent()->getFirstNonPHI()); Register NewDst = MRI.createVirtualRegister(SgprRB_S32); @@ -644,7 +642,7 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) { // ALL divergent i1 phis should be already lowered and inst-selected into PHI // with sgpr reg class and S1 LLT. // Note: this includes divergent phis that don't require lowering. - if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) { + if (Ty.isScalar(1) && MUI.isDivergent(Dst)) { LLVM_DEBUG(dbgs() << "Divergent S1 G_PHI: "; MI.dump();); llvm_unreachable("Make sure to run AMDGPUGlobalISelDivergenceLowering " "before RegBankLegalize to lower lane mask(vcc) phis"); @@ -653,7 +651,7 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) { // We accept all types that can fit in some register class. // Uniform G_PHIs have all sgpr registers. // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr. - if (Ty == LLT::scalar(32) || Ty == LLT::pointer(4, 64)) { + if (Ty.isScalar(32) || Ty == LLT::pointer(4, 64)) { return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index ae3ab86449dd5..81cf6ba7127ea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -88,7 +88,7 @@ class RegBankLegalizeHelper { iterator_range Range, SmallSet &SgprOperandRegs); - LLT getTyFromID(RegBankLLTMappingApplyID ID); + bool isValidTyForID(LLT Ty, RegBankLLTMappingApplyID ID); LLT getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty); const RegisterBank *getRegBankFromID(RegBankLLTMappingApplyID ID); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 33018ae9677a3..18034caee73a8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -43,13 +43,13 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineRegisterInfo &MRI) { switch (UniID) { case S1: - return MRI.getType(Reg) == LLT::scalar(1); + return MRI.getType(Reg).isScalar(1); case S16: - return MRI.getType(Reg) == LLT::scalar(16); + return MRI.getType(Reg).isScalar(16); case S32: - return MRI.getType(Reg) == LLT::scalar(32); + return MRI.getType(Reg).isScalar(32); case S64: - return MRI.getType(Reg) == LLT::scalar(64); + return MRI.getType(Reg).isScalar(64); case P1: return MRI.getType(Reg) == LLT::pointer(1, 64); case P3: @@ -71,13 +71,13 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, case B512: return MRI.getType(Reg).getSizeInBits() == 512; case UniS1: - return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg); + return MRI.getType(Reg).isScalar(1) && MUI.isUniform(Reg); case UniS16: - return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg); + return MRI.getType(Reg).isScalar(16) && MUI.isUniform(Reg); case UniS32: - return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg); + return MRI.getType(Reg).isScalar(32) && MUI.isUniform(Reg); case UniS64: - return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg); + return MRI.getType(Reg).isScalar(64) && MUI.isUniform(Reg); case UniP1: return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg); case UniP3: @@ -99,11 +99,11 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, case UniB512: return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg); case DivS1: - return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg); + return MRI.getType(Reg).isScalar(1) && MUI.isDivergent(Reg); case DivS32: - return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg); + return MRI.getType(Reg).isScalar(32) && MUI.isDivergent(Reg); case DivS64: - return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg); + return MRI.getType(Reg).isScalar(64) && MUI.isDivergent(Reg); case DivP1: return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg); case DivP3: @@ -164,35 +164,33 @@ SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes) : FastTypes(FastTypes) {} UniformityLLTOpPredicateID LLTToId(LLT Ty) { - if (Ty == LLT::scalar(16)) + if (Ty.isScalar(16)) return S16; - if (Ty == LLT::scalar(32)) + if (Ty.isScalar(32)) return S32; - if (Ty == LLT::scalar(64)) + if (Ty.isScalar(64)) return S64; - if (Ty == LLT::fixed_vector(2, 16)) + if (Ty.isFixedVector(2, 16)) return V2S16; - if (Ty == LLT::fixed_vector(2, 32)) + if (Ty.isFixedVector(2, 32)) return V2S32; - if (Ty == LLT::fixed_vector(3, 32)) + if (Ty.isFixedVector(3, 32)) return V3S32; - if (Ty == LLT::fixed_vector(4, 32)) + if (Ty.isFixedVector(4, 32)) return V4S32; return _; } UniformityLLTOpPredicateID LLTToBId(LLT Ty) { - if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || - Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || - Ty == LLT::pointer(6, 32)) + if (Ty.isScalar(32) || Ty.isFixedVector(2, 16) || Ty == LLT::pointer(3, 32) || + Ty == LLT::pointer(5, 32) || Ty == LLT::pointer(6, 32)) return B32; - if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || - Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) || - Ty == LLT::pointer(4, 64)) + if (Ty.isScalar(64) || Ty.isFixedVector(2, 32) || Ty.isFixedVector(4, 16) || + Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) return B64; - if (Ty == LLT::fixed_vector(3, 32)) + if (Ty.isFixedVector(3, 32)) return B96; - if (Ty == LLT::fixed_vector(4, 32)) + if (Ty.isFixedVector(4, 32)) return B128; return _; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c19ee14ab1574..346289bac40e2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -131,8 +131,8 @@ class ApplyRegBankMapping final : public GISelChangeObserver { const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI); if (SrcBank == &AMDGPU::VCCRegBank) { const LLT S32 = LLT::scalar(32); - assert(MRI.getType(SrcReg) == LLT::scalar(1)); - assert(MRI.getType(DstReg) == S32); + assert(MRI.getType(SrcReg).isScalar(1)); + assert(MRI.getType(DstReg).isScalar(32)); assert(NewBank == &AMDGPU::VGPRRegBank); // Replace the extension with a select, which really uses the boolean @@ -170,7 +170,7 @@ class ApplyRegBankMapping final : public GISelChangeObserver { continue; const RegisterBank *RB = NewBank; - if (MRI.getType(Reg) == LLT::scalar(1)) { + if (MRI.getType(Reg).isScalar(1)) { assert(NewBank == &AMDGPU::VGPRRegBank && "s1 operands should only be used for vector bools"); assert((MI.getOpcode() != AMDGPU::G_TRUNC && @@ -298,7 +298,7 @@ AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, if (!Ty.isValid()) return AMDGPU::SGPRRegBank; - return Ty == LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank; + return Ty.isScalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank; } return TRI->isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank; @@ -1495,7 +1495,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B, const RegisterBank *DstBank = OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; if (DstBank == &AMDGPU::VGPRRegBank) { - if (Ty == S32) + if (Ty.isScalar(32)) return true; // There is no 64-bit vgpr bitfield extract instructions so the operation @@ -1568,8 +1568,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B, // TODO: It might be worth using a pseudo here to avoid scc clobber and // register class constraints. - unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) : - (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); + unsigned Opc = Ty.isScalar(32) + ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) + : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs}); if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this)) @@ -1790,7 +1791,7 @@ Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B, const LLT S16 = LLT::scalar(16); LLT StoreVT = MRI.getType(Reg); - if (!StoreVT.isVector() || StoreVT.getElementType() != S16) + if (!StoreVT.isVector() || !StoreVT.getElementType().isScalar(16)) return Reg; auto Unmerge = B.buildUnmerge(S16, Reg); @@ -2213,7 +2214,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_IMPLICIT_DEF: { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - if (DstTy != LLT::scalar(1)) + if (!DstTy.isScalar(1)) break; const RegisterBank *DstBank = @@ -2243,7 +2244,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_PHI: { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - if (DstTy != LLT::scalar(1)) + if (!DstTy.isScalar(1)) break; const LLT S32 = LLT::scalar(32); @@ -2514,7 +2515,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( // 16-bit operations are VALU only, but can be promoted to 32-bit SALU. // Packed 16-bit operations need to be scalarized and promoted. - if (DstTy != LLT::scalar(16) && DstTy != LLT::fixed_vector(2, 16)) + if (!DstTy.isScalar(16) && !DstTy.isFixedVector(2, 16)) break; const RegisterBank *DstBank = @@ -2588,8 +2589,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( Register SrcReg1 = MI.getOperand(2).getReg(); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); - assert(MRI.getType(DstReg) == S64 && "This is a special case for s_mul_u64 " - "that handles only 64-bit operands."); + assert(MRI.getType(DstReg).isScalar(64) && + "This is a special case for s_mul_u64 " + "that handles only 64-bit operands."); const RegisterBank *DstBank = OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; @@ -2684,7 +2686,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( Register SrcReg = MI.getOperand(1).getReg(); const LLT S32 = LLT::scalar(32); LLT Ty = MRI.getType(SrcReg); - if (Ty == S32) + if (Ty.isScalar(32)) break; ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank); @@ -2708,7 +2710,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( Register SrcReg = MI.getOperand(1).getReg(); const LLT S32 = LLT::scalar(32); LLT Ty = MRI.getType(SrcReg); - if (Ty == S32) + if (Ty.isScalar(32)) break; // We can narrow this more efficiently than Helper can by using ffbh/ffbl @@ -2776,7 +2778,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } - if (SrcTy != LLT::scalar(1)) + if (!SrcTy.isScalar(1)) return; // It is not legal to have a legalization artifact with a VCC source. Rather @@ -3783,10 +3785,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // For COPY between a physical reg and an s1, there is no type associated so // we need to take the virtual register's type as a hint on how to interpret // s1 values. - if (!SrcReg.isVirtual() && !DstBank && - MRI.getType(DstReg) == LLT::scalar(1)) + if (!SrcReg.isVirtual() && !DstBank && MRI.getType(DstReg).isScalar(1)) DstBank = &AMDGPU::VCCRegBank; - else if (!DstReg.isVirtual() && MRI.getType(SrcReg) == LLT::scalar(1)) + else if (!DstReg.isVirtual() && MRI.getType(SrcReg).isScalar(1)) DstBank = &AMDGPU::VCCRegBank; if (!DstBank) @@ -4150,7 +4151,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_BUILD_VECTOR: case AMDGPU::G_BUILD_VECTOR_TRUNC: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - if (DstTy == LLT::fixed_vector(2, 16)) { + if (DstTy.isFixedVector(2, 16)) { unsigned DstSize = DstTy.getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI); From 32e287645f0ce154c976998ae8750a4d5b013ead Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Thu, 6 Feb 2025 13:16:43 +0100 Subject: [PATCH 03/16] changeElementCount --- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 2 +- llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp | 6 ++---- .../CodeGen/GlobalISel/LegalizerInfoTest.cpp | 14 ++++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 9472aa196f9b4..f8819d9efd833 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -374,7 +374,7 @@ LegalizeMutation changeElementCountTo(unsigned TypeIdx, unsigned FromTypeIdx); /// Keep the same scalar or element type as \p TypeIdx, but take the number of /// elements from \p Ty. -LegalizeMutation changeElementCountTo(unsigned TypeIdx, LLT Ty); +LegalizeMutation changeElementCountTo(unsigned TypeIdx, ElementCount EC); /// Change the scalar size or element size to have the same scalar size as type /// index \p FromIndex. Unlike changeElementTo, this discards pointer types and diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index 25c1db91b05d8..ded4df4edc14c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, } LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, - LLT NewEltTy) { + ElementCount EC) { return [=](const LegalityQuery &Query) { const LLT OldTy = Query.Types[TypeIdx]; - ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount() - : ElementCount::getFixed(1); - return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + return std::make_pair(TypeIdx, OldTy.changeElementCount(EC)); }; } diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp index 988e307909232..836c81b524672 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp @@ -420,12 +420,14 @@ TEST(LegalizerInfoTest, RuleSets) { // Raw type form LI.getActionDefinitionsBuilder(G_ADD) - .fewerElementsIf(typeIs(0, v4s32), changeElementCountTo(0, v2s32)) - .fewerElementsIf(typeIs(0, v8s32), changeElementCountTo(0, s32)) - .fewerElementsIf(typeIs(0, LLT::scalable_vector(4, 16)), - changeElementCountTo(0, LLT::scalable_vector(2, 16))) - .fewerElementsIf(typeIs(0, LLT::scalable_vector(8, 16)), - changeElementCountTo(0, s16)); + .fewerElementsIf(typeIs(0, v4s32), + changeElementCountTo(0, ElementCount::getFixed(2))) + .fewerElementsIf(typeIs(0, v8s32), + changeElementCountTo(0, ElementCount::getFixed(1))) + .fewerElementsIf(typeIs(0, LLT::scalable_vector(4, s16)), + changeElementCountTo(0, ElementCount::getScalable(2))) + .fewerElementsIf(typeIs(0, LLT::scalable_vector(8, s16)), + changeElementCountTo(0, ElementCount::getFixed(1))); LegacyInfo.computeTables(); From 18a7d3f6fbe58b2e14f01768c0b47bfdb6b55e7d Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Thu, 6 Feb 2025 13:16:55 +0100 Subject: [PATCH 04/16] re-enable bfloat --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 31 -------------------- 1 file changed, 31 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b85239ebf08cb..540694469a3d8 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -295,21 +295,8 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) { MachinePreds[Edge].push_back(NewPred); } -static bool containsBF16Type(const User &U) { - // BF16 cannot currently be represented by LLT, to avoid miscompiles we - // prevent any instructions using them. FIXME: This can be removed once LLT - // supports bfloat. - return U.getType()->getScalarType()->isBFloatTy() || - any_of(U.operands(), [](Value *V) { - return V->getType()->getScalarType()->isBFloatTy(); - }); -} - bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - // Get or create a virtual register for each value. // Unless the value is a Constant => loadimm cst? // or inline constant each time? @@ -329,9 +316,6 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); uint32_t Flags = 0; @@ -349,9 +333,6 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - auto *CI = cast(&U); Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); @@ -1571,9 +1552,6 @@ bool IRTranslator::translateBitCast(const User &U, bool IRTranslator::translateCast(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - uint32_t Flags = 0; if (const Instruction *I = dyn_cast(&U)) Flags = MachineInstr::copyFlagsFromInstruction(*I); @@ -2662,9 +2640,6 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, bool IRTranslator::translateInlineAsm(const CallBase &CB, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(CB)) - return false; - const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering(); if (!ALI) { @@ -2753,9 +2728,6 @@ bool IRTranslator::translateCallBase(const CallBase &CB, } bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - const CallInst &CI = cast(U); const Function *F = CI.getCalledFunction(); @@ -3387,9 +3359,6 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, bool IRTranslator::translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - const AtomicRMWInst &I = cast(U); auto Flags = TLI->getAtomicMemOperandFlags(I, *DL); From 082c256eb794d0480ed256bf366cca401e3c8cbd Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 12 Feb 2025 13:27:53 +0100 Subject: [PATCH 05/16] FPInfo: LLT changes --- llvm/include/llvm/CodeGen/LowLevelTypeUtils.h | 2 +- llvm/include/llvm/CodeGenTypes/LowLevelType.h | 347 +++++++++++++----- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 3 +- llvm/lib/CodeGen/LowLevelTypeUtils.cpp | 75 +++- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 102 +++-- llvm/lib/CodeGenTypes/LowLevelType.cpp | 60 ++- .../GlobalISel/GlobalISelMatchTable.cpp | 101 +++-- 7 files changed, 492 insertions(+), 198 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h b/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h index 142e5cd4e7ad1..e9288ce8fdf51 100644 --- a/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h +++ b/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h @@ -40,6 +40,6 @@ LLT getLLTForMVT(MVT Ty); /// Get the appropriate floating point arithmetic semantic based on the bit size /// of the given scalar LLT. const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty); -} +} // namespace llvm #endif // LLVM_CODEGEN_LOWLEVELTYPEUTILS_H diff --git a/llvm/include/llvm/CodeGenTypes/LowLevelType.h b/llvm/include/llvm/CodeGenTypes/LowLevelType.h index 891d6174048ea..b7089f888d988 100644 --- a/llvm/include/llvm/CodeGenTypes/LowLevelType.h +++ b/llvm/include/llvm/CodeGenTypes/LowLevelType.h @@ -29,6 +29,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include namespace llvm { @@ -38,68 +39,150 @@ class raw_ostream; class LLT { public: + enum class FPVariant { + IEEE_FLOAT = 0x0, + BRAIN_FLOAT = 0x1, // BRAIN_FLOAT + VARIANT_FLOAT_2 = 0x2, // PPC_FLOAT + VARIANT_FLOAT_3 = 0x3, // FP80 + VARIANT_FLOAT_4 = 0x4, // TENSOR_FLOAT + VARIANT_FLOAT_5 = 0x5, // UNASSIGNED + VARIANT_FLOAT_6 = 0x6, // UNASSIGNED + VARIANT_FLOAT_7 = 0x7, // UNASSIGNED + }; + + enum class Kind : uint64_t { + INVALID = 0b000, + INTEGER = 0b001, + FLOAT = 0b010, + POINTER = 0b011, + VECTOR_INTEGER = 0b101, + VECTOR_FLOAT = 0b110, + VECTOR_POINTER = 0b111, + }; + + constexpr static Kind toVector(Kind Ty) { + if (Ty == Kind::POINTER) + return Kind::VECTOR_POINTER; + + if (Ty == Kind::INTEGER) + return Kind::VECTOR_INTEGER; + + if (Ty == Kind::FLOAT) + return Kind::VECTOR_FLOAT; + + llvm_unreachable("Type is already a vector type"); + } + + constexpr static Kind toScalar(Kind Ty) { + if (Ty == Kind::VECTOR_POINTER) + return Kind::POINTER; + + if (Ty == Kind::VECTOR_INTEGER) + return Kind::INTEGER; + + if (Ty == Kind::VECTOR_FLOAT) + return Kind::FLOAT; + + llvm_unreachable("Type is already a scalar type"); + } + /// Get a low-level scalar or aggregate "bag of bits". - static constexpr LLT scalar(unsigned SizeInBits) { - return LLT{/*isPointer=*/false, /*isVector=*/false, /*isScalar=*/true, - ElementCount::getFixed(0), SizeInBits, - /*AddressSpace=*/0}; + [[deprecated("Use LLT::integer(unsigned) instead.")]] static constexpr LLT + scalar(unsigned SizeInBits) { + return LLT{Kind::INTEGER, ElementCount::getFixed(0), SizeInBits, + /*AddressSpace=*/0, static_cast(0)}; + } + + static constexpr LLT integer(unsigned SizeInBits) { + return LLT{Kind::INTEGER, ElementCount::getFixed(0), SizeInBits, + /*AddressSpace=*/0, static_cast(0)}; + } + + static constexpr LLT floatingPoint(unsigned SizeInBits, FPVariant FP) { + return LLT{Kind::FLOAT, ElementCount::getFixed(0), SizeInBits, + /*AddressSpace=*/0, FP}; } /// Get a low-level token; just a scalar with zero bits (or no size). static constexpr LLT token() { - return LLT{/*isPointer=*/false, /*isVector=*/false, - /*isScalar=*/true, ElementCount::getFixed(0), + return LLT{Kind::INTEGER, ElementCount::getFixed(0), /*SizeInBits=*/0, - /*AddressSpace=*/0}; + /*AddressSpace=*/0, static_cast(0)}; } /// Get a low-level pointer in the given address space. static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits) { assert(SizeInBits > 0 && "invalid pointer size"); - return LLT{/*isPointer=*/true, /*isVector=*/false, /*isScalar=*/false, - ElementCount::getFixed(0), SizeInBits, AddressSpace}; + return LLT{Kind::POINTER, ElementCount::getFixed(0), SizeInBits, + AddressSpace, static_cast(0)}; } /// Get a low-level vector of some number of elements and element width. - static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits) { + [[deprecated("Use LLT::vector(EC, LLT) instead.")]] static constexpr LLT + vector(ElementCount EC, unsigned ScalarSizeInBits) { assert(!EC.isScalar() && "invalid number of vector elements"); - return LLT{/*isPointer=*/false, /*isVector=*/true, /*isScalar=*/false, - EC, ScalarSizeInBits, /*AddressSpace=*/0}; + return LLT{Kind::VECTOR_INTEGER, EC, ScalarSizeInBits, + /*AddressSpace=*/0, static_cast(0)}; } /// Get a low-level vector of some number of elements and element type. static constexpr LLT vector(ElementCount EC, LLT ScalarTy) { assert(!EC.isScalar() && "invalid number of vector elements"); assert(!ScalarTy.isVector() && "invalid vector element type"); - return LLT{ScalarTy.isPointer(), - /*isVector=*/true, - /*isScalar=*/false, - EC, - ScalarTy.getSizeInBits().getFixedValue(), - ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0}; + + Kind Info = toVector(ScalarTy.Info); + return LLT{Info, EC, ScalarTy.getSizeInBits().getFixedValue(), + ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0, + ScalarTy.isFloat() ? ScalarTy.getFPVariant() + : static_cast(0)}; + } + // Get a 8-bit brain float value. + static constexpr LLT bfloat8() { + return floatingPoint(8, FPVariant::BRAIN_FLOAT); + } + + // Get a 16-bit brain float value. + static constexpr LLT bfloat16() { + return floatingPoint(16, FPVariant::BRAIN_FLOAT); } /// Get a 16-bit IEEE half value. - /// TODO: Add IEEE semantics to type - This currently returns a simple `scalar(16)`. static constexpr LLT float16() { - return scalar(16); + return floatingPoint(16, FPVariant::IEEE_FLOAT); } /// Get a 32-bit IEEE float value. static constexpr LLT float32() { - return scalar(32); + return floatingPoint(32, FPVariant::IEEE_FLOAT); } /// Get a 64-bit IEEE double value. static constexpr LLT float64() { - return scalar(64); + return floatingPoint(64, FPVariant::IEEE_FLOAT); + } + + /// Get a 80-bit X86 floating point value. + static constexpr LLT x86fp80() { + return floatingPoint(80, FPVariant::VARIANT_FLOAT_3); + } + + /// Get a 128-bit IEEE quad value. + static constexpr LLT float128() { + return floatingPoint(128, FPVariant::IEEE_FLOAT); + } + + /// Get a 128-bit PowerPC double double value. + static constexpr LLT ppcf128() { + return floatingPoint(128, FPVariant::VARIANT_FLOAT_2); } /// Get a low-level fixed-width vector of some number of elements and element /// width. - static constexpr LLT fixed_vector(unsigned NumElements, - unsigned ScalarSizeInBits) { - return vector(ElementCount::getFixed(NumElements), ScalarSizeInBits); + [[deprecated( + "Use LLT::fixed_vector(unsigned, LLT) instead.")]] static constexpr LLT + fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits) { + return vector(ElementCount::getFixed(NumElements), + LLT::integer(ScalarSizeInBits)); } /// Get a low-level fixed-width vector of some number of elements and element @@ -110,9 +193,11 @@ class LLT { /// Get a low-level scalable vector of some number of elements and element /// width. - static constexpr LLT scalable_vector(unsigned MinNumElements, - unsigned ScalarSizeInBits) { - return vector(ElementCount::getScalable(MinNumElements), ScalarSizeInBits); + [[deprecated( + "Use LLT::scalable_vector(unsigned, LLT) instead.")]] static constexpr LLT + scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits) { + return vector(ElementCount::getScalable(MinNumElements), + LLT::integer(ScalarSizeInBits)); } /// Get a low-level scalable vector of some number of elements and element @@ -125,36 +210,83 @@ class LLT { return EC.isScalar() ? ScalarTy : LLT::vector(EC, ScalarTy); } - static constexpr LLT scalarOrVector(ElementCount EC, uint64_t ScalarSize) { + [[deprecated( + "Use LLT::scalarOrVector(EC, LLT) instead.")]] static constexpr LLT + scalarOrVector(ElementCount EC, uint64_t ScalarSize) { assert(ScalarSize <= std::numeric_limits::max() && "Not enough bits in LLT to represent size"); - return scalarOrVector(EC, LLT::scalar(static_cast(ScalarSize))); + return scalarOrVector(EC, LLT::integer(static_cast(ScalarSize))); } - explicit constexpr LLT(bool isPointer, bool isVector, bool isScalar, - ElementCount EC, uint64_t SizeInBits, - unsigned AddressSpace) + explicit constexpr LLT(Kind Info, ElementCount EC, uint64_t SizeInBits, + unsigned AddressSpace, FPVariant FP) : LLT() { - init(isPointer, isVector, isScalar, EC, SizeInBits, AddressSpace); + init(Info, EC, SizeInBits, AddressSpace, FP); } - explicit constexpr LLT() - : IsScalar(false), IsPointer(false), IsVector(false), RawData(0) {} explicit LLT(MVT VT); + explicit constexpr LLT() : Info(static_cast(0)), RawData(0) {} - constexpr bool isValid() const { return IsScalar || RawData != 0; } - constexpr bool isScalar() const { return IsScalar; } - constexpr bool isToken() const { return IsScalar && RawData == 0; }; - constexpr bool isVector() const { return isValid() && IsVector; } + constexpr bool isValid() const { return isToken() || RawData != 0; } + constexpr bool isScalar() const { + return Info == Kind::INTEGER || Info == Kind::FLOAT; + } constexpr bool isScalar(unsigned Size) const { return isScalar() && getScalarSizeInBits() == Size; } + constexpr bool isFloat() const { return isValid() && Info == Kind::FLOAT; } + constexpr bool isFloat(unsigned Size) const { + return isFloat() && getScalarSizeInBits() == Size; + } + constexpr bool isVariantFloat() const { + return isFloat() && getFPVariant() != FPVariant::IEEE_FLOAT; + } + constexpr bool isVariantFloat(FPVariant Variant) const { + return isFloat() && getFPVariant() == Variant; + } + constexpr bool isVariantFloat(unsigned Size, FPVariant Variant) const { + return isVariantFloat(Variant) && getScalarSizeInBits() == Size; + } + constexpr bool isFloatVector() const { + return isVector() && Info == Kind::VECTOR_FLOAT; + } + constexpr bool isIEEEFloat(unsigned Size) const { + return isVariantFloat(Size, FPVariant::IEEE_FLOAT); + } + constexpr bool isBFloat(unsigned Size) const { + return isVariantFloat(Size, FPVariant::BRAIN_FLOAT); + } + constexpr bool isX86FP80() const { + return isVariantFloat(80, FPVariant::VARIANT_FLOAT_3); + } + constexpr bool isPPCF128() const { + return isVariantFloat(128, FPVariant::VARIANT_FLOAT_2); + } + constexpr bool isToken() const { + return Info == Kind::INTEGER && RawData == 0; + } + constexpr bool isInteger() const { + return isValid() && Info == Kind::INTEGER; + } + constexpr bool isInteger(unsigned Size) const { + return isInteger() && getScalarSizeInBits() == Size; + } + constexpr bool isIntegerVector() const { + return isVector() && Info == Kind::VECTOR_INTEGER; + } + constexpr bool isVector() const { + return isValid() && + (Info == Kind::VECTOR_INTEGER || Info == Kind::VECTOR_FLOAT || + Info == Kind::VECTOR_POINTER); + } constexpr bool isPointer() const { - return isValid() && IsPointer && !IsVector; + return isValid() && Info == Kind::POINTER; + } + constexpr bool isPointerVector() const { + return isVector() && Info == Kind::VECTOR_POINTER; } - constexpr bool isPointerVector() const { return IsPointer && isVector(); } constexpr bool isPointerOrPointerVector() const { - return IsPointer && isValid(); + return isPointer() || isPointerVector(); } /// Returns the number of elements in a vector LLT. Must only be called on @@ -190,7 +322,7 @@ class LLT { constexpr bool isScalableVector() const { return isVector() && isScalable(); } constexpr ElementCount getElementCount() const { - assert(IsVector && "cannot get number of elements on scalar/aggregate"); + assert(isVector() && "cannot get number of elements on scalar/aggregate"); return ElementCount::get(getFieldValue(VectorElementsFieldInfo), isScalable()); } @@ -215,6 +347,15 @@ class LLT { return isVector() ? getElementType() : *this; } + constexpr FPVariant getFPVariant() const { + assert((isFloat() || isFloatVector()) && + "cannot get FP info for non float type"); + + return FPVariant(getFieldValue(FPFieldInfo)); + } + + constexpr Kind getKind() const { return Info; } + /// If this type is a vector, return a vector with the same number of elements /// but the new element type. Otherwise, return the new element type. constexpr LLT changeElementType(LLT NewEltTy) const { @@ -225,10 +366,10 @@ class LLT { /// but the new element size. Otherwise, return the new element type. Invalid /// for pointer types. For pointer types, use changeElementType. constexpr LLT changeElementSize(unsigned NewEltSize) const { - assert(!isPointerOrPointerVector() && + assert(!isPointerOrPointerVector() && !(isFloat() || isFloatVector()) && "invalid to directly change element size for pointers"); - return isVector() ? LLT::vector(getElementCount(), NewEltSize) - : LLT::scalar(NewEltSize); + return isVector() ? LLT::vector(getElementCount(), LLT::integer(NewEltSize)) + : LLT::integer(NewEltSize); } /// Return a vector or scalar with the same element type and the new element @@ -237,6 +378,10 @@ class LLT { return LLT::scalarOrVector(EC, getScalarType()); } + constexpr LLT changeElementCount(unsigned NumElements) const { + return changeElementCount(ElementCount::getFixed(NumElements)); + } + /// Return a type that is \p Factor times smaller. Reduces the number of /// elements if this is a vector, or the bitwidth for scalar/pointers. Does /// not attempt to handle cases that aren't evenly divisible. @@ -251,7 +396,7 @@ class LLT { } assert(getScalarSizeInBits() % Factor == 0); - return scalar(getScalarSizeInBits() / Factor); + return integer(getScalarSizeInBits() / Factor); } /// Produce a vector type that is \p Factor times bigger, preserving the @@ -285,10 +430,23 @@ class LLT { /// Returns the vector's element type. Only valid for vector types. constexpr LLT getElementType() const { assert(isVector() && "cannot get element type of scalar/aggregate"); - if (IsPointer) + if (isPointerVector()) return pointer(getAddressSpace(), getScalarSizeInBits()); - else - return scalar(getScalarSizeInBits()); + + if (isFloatVector()) + return floatingPoint(getScalarSizeInBits(), getFPVariant()); + + return integer(getScalarSizeInBits()); + } + + constexpr LLT changeToInteger() const { + if (isPointer() || isPointerVector()) + return *this; + + if (isVector()) + return vector(getElementCount(), LLT::integer(getScalarSizeInBits())); + + return integer(getSizeInBits()); } void print(raw_ostream &OS) const; @@ -298,8 +456,7 @@ class LLT { #endif constexpr bool operator==(const LLT &RHS) const { - return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector && - IsScalar == RHS.IsScalar && RHS.RawData == RawData; + return Info == RHS.Info && RawData == RHS.RawData; } constexpr bool operator!=(const LLT &RHS) const { return !(*this == RHS); } @@ -309,37 +466,33 @@ class LLT { private: /// LLT is packed into 64 bits as follows: - /// isScalar : 1 - /// isPointer : 1 - /// isVector : 1 - /// with 61 bits remaining for Kind-specific data, packed in bitfields - /// as described below. As there isn't a simple portable way to pack bits - /// into bitfields, here the different fields in the packed structure is + /// Info : 3 + /// RawData : 61 + /// with 61 bits of RawData remaining for Kind-specific data, packed in + /// bitfields as described below. As there isn't a simple portable way to pack + /// bits into bitfields, here the different fields in the packed structure is /// described in static const *Field variables. Each of these variables /// is a 2-element array, with the first element describing the bitfield size /// and the second element describing the bitfield offset. /// - /// +--------+---------+--------+----------+----------------------+ - /// |isScalar|isPointer|isVector| RawData |Notes | - /// +--------+---------+--------+----------+----------------------+ - /// | 0 | 0 | 0 | 0 |Invalid | - /// +--------+---------+--------+----------+----------------------+ - /// | 0 | 0 | 1 | 0 |Tombstone Key | - /// +--------+---------+--------+----------+----------------------+ - /// | 0 | 1 | 0 | 0 |Empty Key | - /// +--------+---------+--------+----------+----------------------+ - /// | 1 | 0 | 0 | 0 |Token | - /// +--------+---------+--------+----------+----------------------+ - /// | 1 | 0 | 0 | non-zero |Scalar | - /// +--------+---------+--------+----------+----------------------+ - /// | 0 | 1 | 0 | non-zero |Pointer | - /// +--------+---------+--------+----------+----------------------+ - /// | 0 | 0 | 1 | non-zero |Vector of non-pointer | - /// +--------+---------+--------+----------+----------------------+ - /// | 0 | 1 | 1 | non-zero |Vector of pointer | - /// +--------+---------+--------+----------+----------------------+ - /// - /// Everything else is reserved. + /* + --- LLT --- + + 63 56 47 39 31 23 15 7 0 + | | | | | | | | | + |xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx| + ................................... (1) + ***************** (2) + ~~~~~~~~~~~~~~~~~~~~~~~~~~ (3) + ^^^^^^^^^^^^^^^^^ (4) + @ (5) + ### (6) + %%% (7) + + (1) ScalarSize (2) PointerSize (3) PointerAddressSpace + (4) VectorElements (5) VectorScalable (6) FPVariant (7) Kind + + */ typedef int BitFieldInfo[2]; /// /// This is how the bitfields are packed per Kind: @@ -349,6 +502,7 @@ class LLT { /// * Non-pointer scalar (isPointer == 0 && isVector == 0): /// SizeInBits: 32; static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 29}; + static const constexpr BitFieldInfo FPFieldInfo{3, 21}; /// * Pointer (isPointer == 1 && isVector == 0): /// SizeInBits: 16; /// AddressSpace: 24; @@ -366,9 +520,7 @@ class LLT { /// AddressSpace: 24; /// Scalable: 1; - uint64_t IsScalar : 1; - uint64_t IsPointer : 1; - uint64_t IsVector : 1; + Kind Info : 3; uint64_t RawData : 61; static constexpr uint64_t getMask(const BitFieldInfo FieldInfo) { @@ -389,21 +541,21 @@ class LLT { return getMask(FieldInfo) & (RawData >> FieldInfo[1]); } - constexpr void init(bool IsPointer, bool IsVector, bool IsScalar, - ElementCount EC, uint64_t SizeInBits, - unsigned AddressSpace) { + constexpr void init(Kind Info, ElementCount EC, uint64_t SizeInBits, + unsigned AddressSpace, FPVariant FP) { assert(SizeInBits <= std::numeric_limits::max() && "Not enough bits in LLT to represent size"); - this->IsPointer = IsPointer; - this->IsVector = IsVector; - this->IsScalar = IsScalar; - if (IsPointer) { + this->Info = Info; + if (Info == Kind::POINTER || Info == Kind::VECTOR_POINTER) { RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) | maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo); } else { - RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo); + RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo) | + maskAndShift((uint64_t)FP, FPFieldInfo); } - if (IsVector) { + + if (Info == Kind::VECTOR_INTEGER || Info == Kind::VECTOR_FLOAT || + Info == Kind::VECTOR_POINTER) { RawData |= maskAndShift(EC.getKnownMinValue(), VectorElementsFieldInfo) | maskAndShift(EC.isScalable() ? 1 : 0, VectorScalableFieldInfo); } @@ -411,8 +563,7 @@ class LLT { public: constexpr uint64_t getUniqueRAWLLTData() const { - return ((uint64_t)RawData) << 3 | ((uint64_t)IsScalar) << 2 | - ((uint64_t)IsPointer) << 1 | ((uint64_t)IsVector); + return ((uint64_t)RawData) << 3 | ((uint64_t)Info); } }; @@ -421,15 +572,15 @@ inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) { return OS; } -template<> struct DenseMapInfo { +template <> struct DenseMapInfo { static inline LLT getEmptyKey() { LLT Invalid; - Invalid.IsPointer = true; + Invalid.Info = static_cast(2); return Invalid; } static inline LLT getTombstoneKey() { LLT Invalid; - Invalid.IsVector = true; + Invalid.Info = static_cast(3); return Invalid; } static inline unsigned getHashValue(const LLT &Ty) { @@ -441,6 +592,6 @@ template<> struct DenseMapInfo { } }; -} +} // namespace llvm #endif // LLVM_CODEGEN_LOWLEVELTYPE_H diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 77a1a70d976d6..cb6b342b3fe6a 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -563,7 +563,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, Op.getLLTTy(*getMRI()).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; else - assert(Res.getLLTTy(*getMRI()) == Op.getLLTTy(*getMRI())); + assert(Res.getLLTTy(*getMRI()).getSizeInBits() == + Op.getLLTTy(*getMRI()).getSizeInBits()); return buildInstr(Opcode, Res, Op); } diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp index 936c9fbb2fff0..cf34bf71a8c3a 100644 --- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp +++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp @@ -36,7 +36,37 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { // concerned. auto SizeInBits = DL.getTypeSizeInBits(&Ty); assert(SizeInBits != 0 && "invalid zero-sized type"); - return LLT::scalar(SizeInBits); + + if (Ty.isFloatingPointTy()) { + if (Ty.isHalfTy()) + return LLT::float16(); + + if (Ty.isBFloatTy()) + return LLT::bfloat16(); + + if (Ty.isFloatTy()) + return LLT::float32(); + + if (Ty.isDoubleTy()) + return LLT::float64(); + + if (Ty.isX86_FP80Ty()) + return LLT::x86fp80(); + + if (Ty.isFP128Ty()) + return LLT::float128(); + + if (Ty.isPPC_FP128Ty()) + return LLT::ppcf128(); + + llvm_unreachable("Unhandled LLVM IR floating point type"); + } + + if (Ty.isIntegerTy()) { + return LLT::integer(SizeInBits); + } + + return LLT::integer(SizeInBits); } if (Ty.isTokenTy()) @@ -46,12 +76,25 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { } MVT llvm::getMVTForLLT(LLT Ty) { - if (!Ty.isVector()) - return MVT::getIntegerVT(Ty.getSizeInBits()); + if (Ty.isVector()) { + return MVT::getVectorVT(getMVTForLLT(Ty.getElementType()), + Ty.getElementCount()); + } + + if (Ty.isFloat()) { + if (Ty == LLT::bfloat16()) + return MVT::bf16; + + if (Ty == LLT::x86fp80()) + return MVT::f80; + + if (Ty == LLT::ppcf128()) + return MVT::ppcf128; + + return MVT::getFloatingPointVT(Ty.getSizeInBits()); + } - return MVT::getVectorVT( - MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), - Ty.getElementCount()); + return MVT::getIntegerVT(Ty.getSizeInBits()); } EVT llvm::getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx) { @@ -63,16 +106,20 @@ EVT llvm::getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx) { return EVT::getIntegerVT(Ctx, Ty.getSizeInBits()); } -LLT llvm::getLLTForMVT(MVT Ty) { - if (!Ty.isVector()) - return LLT::scalar(Ty.getSizeInBits()); - - return LLT::scalarOrVector(Ty.getVectorElementCount(), - Ty.getVectorElementType().getSizeInBits()); -} +LLT llvm::getLLTForMVT(MVT Ty) { return LLT(Ty); } const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) { - assert(Ty.isScalar() && "Expected a scalar type."); + assert(Ty.isFloat() && "Expected a scalar type."); + + if (Ty.isBFloat(16)) + return APFloat::BFloat(); + if (Ty.isX86FP80()) + return APFloat::x87DoubleExtended(); + if (Ty.isPPCF128()) + return APFloat::PPCDoubleDouble(); + + assert(!Ty.isVariantFloat() && "Unhandled variant float type"); + switch (Ty.getSizeInBits()) { case 16: return APFloat::IEEEhalf(); diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 5c8e32d11cfb0..ad63b1cd4310e 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1923,26 +1923,33 @@ static bool verifyAddrSpace(uint64_t AddrSpace) { } bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { - if (Token.range().front() == 's' || Token.range().front() == 'p') { - StringRef SizeStr = Token.range().drop_front(); - if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) - return error("expected integers after 's'/'p' type character"); - } - - if (Token.range().front() == 's') { - auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); - if (ScalarSize) { - if (!verifyScalarSize(ScalarSize)) - return error("invalid size for scalar type"); - Ty = LLT::scalar(ScalarSize); - } else { + StringRef TypeDigits = Token.range(); + if (TypeDigits.consume_front("s") || TypeDigits.consume_front("i") || + TypeDigits.consume_front("f") || TypeDigits.consume_front("p") || + TypeDigits.consume_front("bf")) { + if (TypeDigits.empty() || !llvm::all_of(TypeDigits, isdigit)) + return error("expected integers after 's'/'i'/'f'/'bf'/'p' type prefix"); + } + + if (Token.range().starts_with("s") || Token.range().starts_with("i")) { + auto ScalarSize = APSInt(TypeDigits).getZExtValue(); + if (!ScalarSize) { Ty = LLT::token(); + lex(); + return false; } + + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + + Ty = LLT::integer(ScalarSize); lex(); return false; - } else if (Token.range().front() == 'p') { + } + + if (Token.range().starts_with("p")) { const DataLayout &DL = MF.getDataLayout(); - uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue(); + uint64_t AS = APSInt(TypeDigits).getZExtValue(); if (!verifyAddrSpace(AS)) return error("invalid address space number"); @@ -1951,6 +1958,23 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { return false; } + if (Token.range().starts_with("f") || Token.range().starts_with("bf")) { + LLT::FPVariant FPVariant; + if (Token.range().starts_with("f")) { + FPVariant = LLT::FPVariant::IEEE_FLOAT; + } else if (Token.range().starts_with("bf")) { + FPVariant = LLT::FPVariant::BRAIN_FLOAT; + } else { + return error("unknown floating point type identifier"); + } + auto ScalarSize = APSInt(TypeDigits).getZExtValue(); + if (!ScalarSize || !verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + Ty = LLT::floatingPoint(ScalarSize, FPVariant); + lex(); + return false; + } + // Now we're looking for a vector. if (Token.isNot(MIToken::less)) return error(Loc, "expected sN, pA, , , , " @@ -1985,25 +2009,39 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { return GetError(); lex(); - if (Token.range().front() != 's' && Token.range().front() != 'p') + StringRef VectorTyDigits = Token.range(); + if (!VectorTyDigits.consume_front("s") && + !VectorTyDigits.consume_front("i") && + !VectorTyDigits.consume_front("f") && + !VectorTyDigits.consume_front("p") && !VectorTyDigits.consume_front("bf")) return GetError(); - StringRef SizeStr = Token.range().drop_front(); - if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) - return error("expected integers after 's'/'p' type character"); + if (VectorTyDigits.empty() || !llvm::all_of(VectorTyDigits, isdigit)) + return error( + "expected integers after 's'/'i'/'f'/'bf'/'p' type identifier"); - if (Token.range().front() == 's') { - auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + if (Token.range().starts_with("s") || Token.range().starts_with("i")) { + auto ScalarSize = APSInt(VectorTyDigits).getZExtValue(); if (!verifyScalarSize(ScalarSize)) return error("invalid size for scalar element in vector"); - Ty = LLT::scalar(ScalarSize); - } else if (Token.range().front() == 'p') { + Ty = LLT::integer(ScalarSize); + } else if (Token.range().starts_with("p")) { const DataLayout &DL = MF.getDataLayout(); - uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue(); + uint64_t AS = APSInt(VectorTyDigits).getZExtValue(); if (!verifyAddrSpace(AS)) return error("invalid address space number"); Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + } else if (Token.range().starts_with("f")) { + auto ScalarSize = APSInt(VectorTyDigits).getZExtValue(); + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for float element in vector"); + Ty = LLT::floatingPoint(ScalarSize, LLT::FPVariant::IEEE_FLOAT); + } else if (Token.range().starts_with("bf")) { + auto ScalarSize = APSInt(VectorTyDigits).getZExtValue(); + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for bfloat element in vector"); + Ty = LLT::floatingPoint(ScalarSize, LLT::FPVariant::BRAIN_FLOAT); } else return GetError(); lex(); @@ -2019,14 +2057,14 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { assert(Token.is(MIToken::Identifier)); - StringRef TypeStr = Token.range(); - if (TypeStr.front() != 'i' && TypeStr.front() != 's' && - TypeStr.front() != 'p') - return error( - "a typed immediate operand should start with one of 'i', 's', or 'p'"); - StringRef SizeStr = Token.range().drop_front(); - if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) - return error("expected integers after 'i'/'s'/'p' type character"); + StringRef TypeDigits = Token.range(); + if (!TypeDigits.consume_front("i") && !TypeDigits.consume_front("s") && + !TypeDigits.consume_front("p") && !TypeDigits.consume_front("f") && + !TypeDigits.consume_front("bf")) + return error("a typed immediate operand should start with one of 'i', " + "'s','f','bf', or 'p'"); + if (TypeDigits.empty() || !llvm::all_of(TypeDigits, isdigit)) + return error("expected integers after 'i'/'s'/'f'/'bf'/'p' type character"); auto Loc = Token.location(); lex(); diff --git a/llvm/lib/CodeGenTypes/LowLevelType.cpp b/llvm/lib/CodeGenTypes/LowLevelType.cpp index 4785f2652b00e..925b4efaf0edf 100644 --- a/llvm/lib/CodeGenTypes/LowLevelType.cpp +++ b/llvm/lib/CodeGenTypes/LowLevelType.cpp @@ -16,22 +16,45 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +static std::optional deriveFPInfo(MVT VT) { + if (!VT.isFloatingPoint()) + return std::nullopt; + + switch (VT.getScalarType().SimpleTy) { + case MVT::bf16: + return LLT::FPVariant::BRAIN_FLOAT; + case MVT::f80: + return LLT::FPVariant::VARIANT_FLOAT_3; + case MVT::ppcf128: + return LLT::FPVariant::VARIANT_FLOAT_2; + default: + return LLT::FPVariant::IEEE_FLOAT; + } +} + LLT::LLT(MVT VT) { + auto FP = deriveFPInfo(VT); + bool AsVector = VT.isVector() && + (VT.getVectorMinNumElements() > 1 || VT.isScalableVector()); + + Kind Info; + if (FP.has_value()) + Info = AsVector ? Kind::VECTOR_FLOAT : Kind::FLOAT; + else + Info = AsVector ? Kind::VECTOR_INTEGER : Kind::INTEGER; + if (VT.isVector()) { - bool asVector = VT.getVectorMinNumElements() > 1 || VT.isScalableVector(); - init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector, - VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(), - /*AddressSpace=*/0); + init(Info, VT.getVectorElementCount(), + VT.getVectorElementType().getSizeInBits(), + /*AddressSpace=*/0, FP.value_or(FPVariant::IEEE_FLOAT)); } else if (VT.isValid() && !VT.isScalableTargetExtVT()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. - init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true, - ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0); + init(Info, ElementCount::getFixed(0), VT.getSizeInBits(), + /*AddressSpace=*/0, FP.value_or(FPVariant::IEEE_FLOAT)); } else { - IsScalar = false; - IsPointer = false; - IsVector = false; - RawData = 0; + this->Info = static_cast(0); + this->RawData = 0; } } @@ -39,13 +62,23 @@ void LLT::print(raw_ostream &OS) const { if (isVector()) { OS << "<"; OS << getElementCount() << " x " << getElementType() << ">"; - } else if (isPointer()) + } else if (isPointer()) { OS << "p" << getAddressSpace(); - else if (isValid()) { + } else if (isBFloat(16)) { + OS << "bf16"; + } else if (isPPCF128()) { + OS << "ppcf128"; + } else if (isFloat()) { + assert(!isVariantFloat() && "unknown float variant"); + OS << "f" << getScalarSizeInBits(); + } else if (isInteger()) { + OS << "i" << getScalarSizeInBits(); + } else if (isValid()) { assert(isScalar() && "unexpected type"); OS << "s" << getScalarSizeInBits(); - } else + } else { OS << "LLT_invalid"; + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -56,6 +89,7 @@ LLVM_DUMP_METHOD void LLT::dump() const { #endif const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::FPFieldInfo; const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp index 4c809b4016cbd..48389aa7b5cc2 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp @@ -355,42 +355,95 @@ std::string LLTCodeGen::getCxxEnumValue() const { void LLTCodeGen::emitCxxEnumValue(raw_ostream &OS) const { if (Ty.isScalar()) { - OS << "GILLT_s" << Ty.getSizeInBits(); + if (Ty.isBFloat(16)) + OS << "GILLT_bf16"; + else if (Ty.isPPCF128()) + OS << "GILLT_ppcf128"; + else if (Ty.isX86FP80()) + OS << "GILLT_x86fp80"; + else if (Ty.isFloat()) + OS << "GILLT_f" << Ty.getSizeInBits(); + else if (Ty.isInteger()) + OS << "GILLT_i" << Ty.getSizeInBits(); + else + OS << "GILLT_s" << Ty.getSizeInBits(); return; } if (Ty.isVector()) { OS << (Ty.isScalable() ? "GILLT_nxv" : "GILLT_v") - << Ty.getElementCount().getKnownMinValue() << "s" - << Ty.getScalarSizeInBits(); + << Ty.getElementCount().getKnownMinValue(); + + LLT ElemTy = Ty.getElementType(); + if (ElemTy.isBFloat(16)) + OS << "bf16"; + else if (ElemTy.isPPCF128()) + OS << "ppcf128"; + else if (ElemTy.isX86FP80()) + OS << "x86fp80"; + else if (ElemTy.isFloat()) + OS << "f" << ElemTy.getSizeInBits(); + else if (Ty.isInteger()) + OS << "i" << ElemTy.getSizeInBits(); + else + OS << "s" << ElemTy.getSizeInBits(); return; } + if (Ty.isPointer()) { OS << "GILLT_p" << Ty.getAddressSpace(); if (Ty.getSizeInBits() > 0) OS << "s" << Ty.getSizeInBits(); return; } + llvm_unreachable("Unhandled LLT"); } void LLTCodeGen::emitCxxConstructorCall(raw_ostream &OS) const { if (Ty.isScalar()) { - OS << "LLT::scalar(" << Ty.getSizeInBits() << ")"; + if (Ty.isInteger()) + OS << "LLT::integer(" << Ty.getScalarSizeInBits() << ")"; + else if (Ty.isBFloat(16)) + OS << "LLT::bfloat()"; + else if (Ty.isPPCF128()) + OS << "LLT::ppcf128()"; + else if (Ty.isX86FP80()) + OS << "LLT::x86fp80()"; + else if (Ty.isFloat()) + OS << "LLT::floatingPoint(" << Ty.getScalarSizeInBits() + << ", LLT::FPVariant::IEEE_FLOAT)"; return; } + if (Ty.isVector()) { OS << "LLT::vector(" << (Ty.isScalable() ? "ElementCount::getScalable(" : "ElementCount::getFixed(") - << Ty.getElementCount().getKnownMinValue() << "), " - << Ty.getScalarSizeInBits() << ")"; + << Ty.getElementCount().getKnownMinValue() << "), "; + + LLT ElemTy = Ty.getElementType(); + if (ElemTy.isInteger()) + OS << "LLT::integer(" << ElemTy.getScalarSizeInBits() << ")"; + else if (ElemTy.isBFloat(16)) + OS << "LLT::bfloat()"; + else if (ElemTy.isPPCF128()) + OS << "LLT::ppcf128()"; + else if (ElemTy.isX86FP80()) + OS << "LLT::x86fp80()"; + else if (ElemTy.isFloat()) + OS << "LLT::floatingPoint(" << ElemTy.getScalarSizeInBits() + << ", LLT::FPVariant::IEEE_FLOAT)"; + + OS << ")"; return; } + if (Ty.isPointer() && Ty.getSizeInBits() > 0) { OS << "LLT::pointer(" << Ty.getAddressSpace() << ", " << Ty.getSizeInBits() << ")"; return; } + llvm_unreachable("Unhandled LLT"); } @@ -398,36 +451,7 @@ void LLTCodeGen::emitCxxConstructorCall(raw_ostream &OS) const { /// particular logic behind the order but either A < B or B < A must be /// true if A != B. bool LLTCodeGen::operator<(const LLTCodeGen &Other) const { - if (Ty.isValid() != Other.Ty.isValid()) - return Ty.isValid() < Other.Ty.isValid(); - if (!Ty.isValid()) - return false; - - if (Ty.isVector() != Other.Ty.isVector()) - return Ty.isVector() < Other.Ty.isVector(); - if (Ty.isScalar() != Other.Ty.isScalar()) - return Ty.isScalar() < Other.Ty.isScalar(); - if (Ty.isPointer() != Other.Ty.isPointer()) - return Ty.isPointer() < Other.Ty.isPointer(); - - if (Ty.isPointer() && Ty.getAddressSpace() != Other.Ty.getAddressSpace()) - return Ty.getAddressSpace() < Other.Ty.getAddressSpace(); - - if (Ty.isVector() && Ty.getElementCount() != Other.Ty.getElementCount()) - return std::tuple(Ty.isScalable(), - Ty.getElementCount().getKnownMinValue()) < - std::tuple(Other.Ty.isScalable(), - Other.Ty.getElementCount().getKnownMinValue()); - - assert((!Ty.isVector() || Ty.isScalable() == Other.Ty.isScalable()) && - "Unexpected mismatch of scalable property"); - return Ty.isVector() - ? std::tuple(Ty.isScalable(), - Ty.getSizeInBits().getKnownMinValue()) < - std::tuple(Other.Ty.isScalable(), - Other.Ty.getSizeInBits().getKnownMinValue()) - : Ty.getSizeInBits().getFixedValue() < - Other.Ty.getSizeInBits().getFixedValue(); + return Ty.getUniqueRAWLLTData() < Other.Ty.getUniqueRAWLLTData(); } //===- LLTCodeGen Helpers -------------------------------------------------===// @@ -436,11 +460,10 @@ std::optional MVTToLLT(MVT::SimpleValueType SVT) { MVT VT(SVT); if (VT.isVector() && !VT.getVectorElementCount().isScalar()) - return LLTCodeGen( - LLT::vector(VT.getVectorElementCount(), VT.getScalarSizeInBits())); + return LLTCodeGen(LLT(VT)); if (VT.isInteger() || VT.isFloatingPoint()) - return LLTCodeGen(LLT::scalar(VT.getSizeInBits())); + return LLTCodeGen(LLT(VT)); return std::nullopt; } From 6e2ec2489416f2af45dd574f57bbbd51e6f28b03 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 12 Feb 2025 13:34:11 +0100 Subject: [PATCH 06/16] FPInfo: IRTranslator and CallLowering --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 2 + llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 87 ++++++++++++++----- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 46 +++++----- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 23 +++++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 35 ++++++-- 5 files changed, 142 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 7b0475ac2481d..e04bcebbffd03 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -802,6 +802,8 @@ class MachineIRBuilder { MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op); + MachineInstrBuilder buildTruncLike(const DstOp &Res, const SrcOp &Op); + /// Build and inserts \p Res = \p G_AND \p Op, \p LowBitsSet(ImmOp) /// Since there is no G_ZEXT_INREG like G_SEXT_INREG, the instruction is /// emulated using G_AND. diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 0af70f333f864..c5142b84d8c59 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -20,9 +21,11 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "call-lowering" @@ -409,12 +412,12 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, // Sometimes pointers are passed zero extended. LLT OrigTy = MRI.getType(OrigRegs[0]); if (OrigTy.isPointer()) { - LLT IntPtrTy = LLT::scalar(OrigTy.getSizeInBits()); + LLT IntPtrTy = LLT::integer(OrigTy.getSizeInBits()); B.buildIntToPtr(OrigRegs[0], B.buildTrunc(IntPtrTy, SrcReg)); return; } - B.buildTrunc(OrigRegs[0], SrcReg); + B.buildTruncLike(OrigRegs[0], SrcReg); return; } @@ -423,11 +426,22 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, LLT OrigTy = MRI.getType(OrigRegs[0]); unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size(); - if (SrcSize == OrigTy.getSizeInBits()) - B.buildMergeValues(OrigRegs[0], Regs); - else { - auto Widened = B.buildMergeLikeInstr(LLT::scalar(SrcSize), Regs); - B.buildTrunc(OrigRegs[0], Widened); + if (SrcSize == OrigTy.getSizeInBits()) { + if (OrigTy.isFloat() && !PartLLT.isFloat()) { + auto Merge = B.buildMergeValues(OrigTy.changeToInteger(), Regs); + B.buildBitcast(OrigRegs[0], Merge); + } else if (!OrigTy.isFloat() && PartLLT.isFloat()) { + SmallVector CastRegs(Regs.size()); + for (auto&& [Idx, Reg]: enumerate(Regs)) + CastRegs[Idx] = B.buildBitcast(PartLLT.changeToInteger(), Reg).getReg(0); + + B.buildMergeValues(OrigRegs[0], CastRegs); + } else { + B.buildMergeValues(OrigRegs[0], Regs); + } + } else { + auto Widened = B.buildMergeLikeInstr(LLT::integer(SrcSize), Regs); + B.buildTruncLike(OrigRegs[0], Widened); } return; @@ -492,19 +506,25 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, SmallVector EltMerges; int PartsPerElt = divideCeil(DstEltTy.getSizeInBits(), PartLLT.getSizeInBits()); - LLT ExtendedPartTy = LLT::scalar(PartLLT.getSizeInBits() * PartsPerElt); + LLT ExtendedPartTy = LLT::integer(PartLLT.getSizeInBits() * PartsPerElt); for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { auto Merge = B.buildMergeLikeInstr(ExtendedPartTy, Regs.take_front(PartsPerElt)); if (ExtendedPartTy.getSizeInBits() > RealDstEltTy.getSizeInBits()) - Merge = B.buildTrunc(RealDstEltTy, Merge); + Merge = B.buildTruncLike(RealDstEltTy, Merge); // Fix the type in case this is really a vector of pointers. - MRI.setType(Merge.getReg(0), RealDstEltTy); - EltMerges.push_back(Merge.getReg(0)); + Register MergeReg = Merge.getReg(0); + + if (RealDstEltTy.isPointer()) { + MRI.setType(MergeReg, RealDstEltTy); + } else if (RealDstEltTy.isFloat() && + !MRI.getType(MergeReg).getScalarType().isFloat()) { + MergeReg = B.buildBitcast(RealDstEltTy, MergeReg).getReg(0); + } + EltMerges.push_back(MergeReg); Regs = Regs.drop_front(PartsPerElt); } - B.buildBuildVector(OrigRegs[0], EltMerges); } else { // Vector was split, and elements promoted to a wider type. @@ -532,9 +552,12 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, SmallVector BVRegs; BVRegs.reserve(Regs.size() * EltPerReg); for (Register R : Regs) { - auto Unmerge = B.buildUnmerge(OriginalEltTy, R); - for (unsigned K = 0; K < EltPerReg; ++K) - BVRegs.push_back(B.buildAnyExt(PartLLT, Unmerge.getReg(K)).getReg(0)); + auto Unmerge = B.buildUnmerge(OriginalEltTy.changeToInteger(), R); + for (unsigned K = 0; K < EltPerReg; ++K) { + Register BVreg; + BVreg = B.buildAnyExt(PartLLT, Unmerge.getReg(K)).getReg(0); + BVRegs.push_back(BVreg); + } } // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces @@ -545,7 +568,8 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, } BuildVec = B.buildBuildVector(BVType, BVRegs).getReg(0); } - B.buildTrunc(OrigRegs[0], BuildVec); + + B.buildTruncLike(OrigRegs[0], BuildVec); } } @@ -565,6 +589,8 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, if (PartTy.isVector() == SrcTy.isVector() && PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { assert(DstRegs.size() == 1); + if (PartTy.getScalarType().isFloat() && SrcTy.getScalarType().isFloat()) + ExtendOp = TargetOpcode::G_FPEXT; B.buildInstr(ExtendOp, {DstRegs[0]}, {SrcReg}); return; } @@ -573,8 +599,18 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, TypeSize::isKnownGT(PartSize, SrcTy.getElementType().getSizeInBits())) { // Vector was scalarized, and the elements extended. auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg); - for (int i = 0, e = DstRegs.size(); i != e; ++i) - B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); + for (int i = 0, e = DstRegs.size(); i != e; ++i) { + Register Unmerge = UnmergeToEltTy.getReg(i); + if (SrcTy.isFloatVector() && PartTy.isFloat()) { + B.buildFPExt(DstRegs[i], Unmerge); + continue; + } + + if (SrcTy.isFloatVector() && !PartTy.isFloat()) + Unmerge = B.buildBitcast(SrcTy.getElementType().changeToInteger(), Unmerge).getReg(0); + + B.buildAnyExt(DstRegs[i], Unmerge); + } return; } @@ -590,6 +626,9 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, LLT GCDTy = getGCDType(SrcTy, PartTy); if (GCDTy == PartTy) { + if (SrcTy.getScalarType().isFloat() && !PartTy.getScalarType().isFloat()) + SrcReg = B.buildBitcast(SrcTy.changeToInteger(), SrcReg).getReg(0); + // If this already evenly divisible, we can create a simple unmerge. B.buildUnmerge(DstRegs, SrcReg); return; @@ -599,8 +638,11 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, SrcTy.getScalarSizeInBits() > PartTy.getSizeInBits()) { LLT ExtTy = LLT::vector(SrcTy.getElementCount(), - LLT::scalar(PartTy.getScalarSizeInBits() * DstRegs.size() / - SrcTy.getNumElements())); + LLT::integer(PartTy.getScalarSizeInBits() * DstRegs.size() / + SrcTy.getNumElements())); + if (SrcTy.isFloatVector()) + SrcReg = B.buildBitcast(SrcTy.changeToInteger(), SrcReg).getReg(0); + auto Ext = B.buildAnyExt(ExtTy, SrcReg); B.buildUnmerge(DstRegs, Ext); return; @@ -626,7 +668,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, // For scalars, it's common to be able to use a simple extension. if (SrcTy.isScalar() && DstTy.isScalar()) { CoveringSize = alignTo(SrcSize, DstSize); - LLT CoverTy = LLT::scalar(CoveringSize); + LLT CoverTy = LLT::integer(CoveringSize); UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0); } else { // Widen to the common type. @@ -822,8 +864,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy && VA.getLocInfo() != CCValAssign::Indirect) { assert(Args[i].OrigRegs.size() == 1); + unsigned ExtendOp = extendOpFromFlags(Args[i].Flags[0]); buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy, - ValTy, extendOpFromFlags(Args[i].Flags[0])); + ValTy, ExtendOp); } bool IndirectParameterPassingHandled = false; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 540694469a3d8..f43f7d0f0a410 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -862,7 +862,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. auto *PtrIRTy = PointerType::getUnqual(SValue.getContext()); - const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy)); + const LLT PtrScalarTy = LLT::integer(DL->getTypeSizeInBits(PtrIRTy)); Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub); JT.Reg = Sub.getReg(0); @@ -879,7 +879,8 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT, auto Cst = getOrCreateVReg( *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First)); Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0); - auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst); + LLT CmpTy = LLT::integer(1); + auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, CmpTy, Sub, Cst); auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default); @@ -910,7 +911,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, return; } - const LLT i1Ty = LLT::scalar(1); + const LLT i1Ty = LLT::integer(1); // Build the compare. if (!CB.CmpMHS) { const auto *CI = dyn_cast(CB.CmpRHS); @@ -1092,14 +1093,14 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, LLT MaskTy = SwitchOpTy; if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() || !llvm::has_single_bit(MaskTy.getSizeInBits())) - MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + MaskTy = LLT::integer(PtrTy.getSizeInBits()); else { // Ensure that the type will fit the mask value. for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { // Switch table case range are encoded into series of masks. // Just use pointer type, it's guaranteed to fit. - MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + MaskTy = LLT::integer(PtrTy.getSizeInBits()); break; } } @@ -1122,8 +1123,9 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, if (!B.FallthroughUnreachable) { // Conditional branch to the default block. auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); - auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), - RangeSub, RangeCst); + LLT CmpTy = LLT::integer(1); + auto RangeCmp = + MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, CmpTy, RangeSub, RangeCst); MIB.buildBrCond(RangeCmp, *B.Default); } @@ -1141,6 +1143,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, MIB.setMBB(*SwitchBB); LLT SwitchTy = getLLTForMVT(BB.RegVT); + LLT I1 = LLT::integer(1); Register Cmp; unsigned PopCount = llvm::popcount(B.Mask); if (PopCount == 1) { @@ -1149,14 +1152,12 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, auto MaskTrailingZeros = MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask)); Cmp = - MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) - .getReg(0); + MIB.buildICmp(ICmpInst::ICMP_EQ, I1, Reg, MaskTrailingZeros).getReg(0); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. auto MaskTrailingOnes = MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask)); - Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) - .getReg(0); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, Reg, MaskTrailingOnes).getReg(0); } else { // Make desired shift. auto CstOne = MIB.buildConstant(SwitchTy, 1); @@ -1166,8 +1167,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, auto CstMask = MIB.buildConstant(SwitchTy, B.Mask); auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask); auto CstZero = MIB.buildConstant(SwitchTy, 0); - Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero) - .getReg(0); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, AndOp, CstZero).getReg(0); } // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. @@ -1691,7 +1691,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, SrcRegs.push_back(SrcReg); } - LLT SizeTy = LLT::scalar(MinPtrSize); + LLT SizeTy = LLT::integer(MinPtrSize); // The size operand should be the minimum of the pointer sizes. Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1]; @@ -2812,7 +2812,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); LLT MemTy = Info.memVT.isSimple() ? getLLTForMVT(Info.memVT.getSimpleVT()) - : LLT::scalar(Info.memVT.getStoreSizeInBits()); + : LLT::integer(Info.memVT.getStoreSizeInBits()); // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic // didn't yield anything useful. @@ -3158,7 +3158,7 @@ bool IRTranslator::translateInsertElement(const User &U, if (!Idx) Idx = getOrCreateVReg(*U.getOperand(2)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { - const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + const LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth); Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); @@ -3201,7 +3201,7 @@ bool IRTranslator::translateInsertVector(const User &U, if (isa(U.getOperand(0)->getType())) { // We are inserting an illegal fixed vector into a scalable // vector, use a scalar element insert. - LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth); Register Idx = getOrCreateVReg(*CI); auto ScaledIndex = MIRBuilder.buildMul( VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx); @@ -3239,7 +3239,7 @@ bool IRTranslator::translateExtractElement(const User &U, if (!Idx) Idx = getOrCreateVReg(*U.getOperand(1)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { - const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + const LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth); Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); @@ -3279,7 +3279,7 @@ bool IRTranslator::translateExtractVector(const User &U, if (isa(U.getOperand(0)->getType())) { // We are extracting an illegal fixed vector from a scalable // vector, use a scalar element extract. - LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth); Register Idx = getOrCreateVReg(*CI); auto ScaledIndex = MIRBuilder.buildMul( VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx); @@ -3868,8 +3868,8 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. if (TLI->useLoadStackGuardNode(*ParentBB->getBasicBlock()->getModule())) { - Guard = - MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits())); + LLT RegTy = LLT::integer(PtrTy.getSizeInBits()); + Guard = MRI->createGenericVirtualRegister(RegTy); getStackGuard(Guard, *CurBuilder); } else { // TODO: test using android subtarget when we support @llvm.thread.pointer. @@ -3885,8 +3885,8 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, } // Perform the comparison. - auto Cmp = - CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal); + LLT I1 = LLT::integer(1); + auto Cmp = CurBuilder->buildICmp(CmpInst::ICMP_NE, I1, Guard, GuardVal); // If the guard/stackslot do not equal, branch to failure MBB. CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB()); // Otherwise branch to success MBB. diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index cb6b342b3fe6a..6ee4931c4653c 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -569,6 +569,29 @@ MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, return buildInstr(Opcode, Res, Op); } +MachineInstrBuilder MachineIRBuilder::buildTruncLike(const DstOp &Res, const SrcOp &Op) { + LLT DstTy = Res.getLLTTy(*getMRI()); + LLT SrcTy = Op.getLLTTy(*getMRI()); + + bool DstIsFloat = DstTy.getScalarType().isFloat(); + bool SrcIsFloat = SrcTy.getScalarType().isFloat(); + + assert(DstTy.isVector() == SrcTy.isVector()); + + if (DstIsFloat && !SrcIsFloat) { + auto Trunc = buildTrunc(DstTy.changeToInteger(), Op); + return buildBitcast(Res, Trunc); + } + + if (!DstIsFloat && SrcIsFloat) { + auto Bitcast = buildBitcast(SrcTy.changeToInteger(), Op); + return buildTrunc(Res, Bitcast); + } + + unsigned Opcode = DstIsFloat ? TargetOpcode::G_FPTRUNC : TargetOpcode::G_TRUNC; + return buildInstr(Opcode, Res, Op); +} + MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op) { return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 80ee864e7d291..c83479056c27c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -32,10 +32,19 @@ namespace { /// Wrapper around extendRegister to ensure we extend to a full 32-bit register. static Register extendRegisterMin32(CallLowering::ValueHandler &Handler, Register ValVReg, const CCValAssign &VA) { - if (VA.getLocVT().getSizeInBits() < 32) { + LLT SrcTy = LLT(VA.getLocVT()); + + if (SrcTy.getSizeInBits() < 32) { + LLT I32 = LLT::integer(32); + LLT DstTy = LLT::integer(SrcTy.getSizeInBits()); + + Register SrcReg = ValVReg; + if (SrcTy.isFloat()) + SrcReg = Handler.MIRBuilder.buildBitcast(DstTy, ValVReg).getReg(0); + // 16-bit types are reported as legal for 32-bit registers. We need to // extend and do a 32-bit copy to avoid the verifier complaining about it. - return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); + return Handler.MIRBuilder.buildAnyExt(I32, SrcReg).getReg(0); } return Handler.extendRegister(ValVReg, VA); @@ -119,16 +128,30 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override { markPhysRegUsed(PhysReg); + LLT LocTy = LLT(VA.getLocVT()); - if (VA.getLocVT().getSizeInBits() < 32) { + if (LocTy.getSizeInBits() < 32) { // 16-bit types are reported as legal for 32-bit registers. We need to do // a 32-bit copy, and truncate to avoid the verifier complaining about it. - auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); + Register CopyReg = + MIRBuilder.buildCopy(LLT::scalar(32), PhysReg).getReg(0); + + if (LocTy.getScalarType().isFloat()) { + LLT TruncTy = + LocTy.isVector() + ? LLT::vector(LocTy.getElementCount(), + LLT::integer(LocTy.getScalarSizeInBits())) + : LLT::integer(LocTy.getScalarSizeInBits()); + + auto Extended = buildExtensionHint(VA, CopyReg, TruncTy); + auto Trunc = MIRBuilder.buildTrunc(TruncTy, Extended); + MIRBuilder.buildBitcast(ValVReg, Trunc.getReg(0)); + return; + } // If we have signext/zeroext, it applies to the whole 32-bit register // before truncation. - auto Extended = - buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT())); + auto Extended = buildExtensionHint(VA, CopyReg, LocTy); MIRBuilder.buildTrunc(ValVReg, Extended); return; } From e01ddfab32764b90713870a98c3959564b2905a7 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 12 Feb 2025 13:40:39 +0100 Subject: [PATCH 07/16] FPInfo: AMDGPURegBankLegalize --- .../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 27 +++++------ .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 2 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 46 ++++++++++++------- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 966de6f00a4a5..880bc1f8a6d1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -21,6 +21,7 @@ #include "AMDGPUGlobalISelUtils.h" #include "AMDGPURegBankLegalizeHelper.h" #include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -106,10 +107,10 @@ class AMDGPURegBankLegalizeCombiner { const RegisterBank *VgprRB; const RegisterBank *VccRB; - static constexpr LLT S1 = LLT::scalar(1); - static constexpr LLT S16 = LLT::scalar(16); - static constexpr LLT S32 = LLT::scalar(32); - static constexpr LLT S64 = LLT::scalar(64); + static constexpr LLT I1 = LLT::integer(1); + static constexpr LLT I16 = LLT::integer(16); + static constexpr LLT I32 = LLT::integer(32); + static constexpr LLT I64 = LLT::integer(64); public: AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, @@ -156,13 +157,13 @@ class AMDGPURegBankLegalizeCombiner { // %Dst:lane-mask(s1) = G_AMDGPU_COPY_VCC_SCC %TruncS32Src:sgpr(s32) if (isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) { auto [Trunc, TruncS32Src] = tryMatch(Src, AMDGPU::G_TRUNC); - assert(Trunc && MRI.getType(TruncS32Src) == S32 && + assert(Trunc && MRI.getType(TruncS32Src) == I32 && "sgpr S1 must be result of G_TRUNC of sgpr S32"); B.setInstr(MI); // Ensure that truncated bits in BoolSrc are 0. - auto One = B.buildConstant({SgprRB, S32}, 1); - auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One); + auto One = B.buildConstant({SgprRB, I32}, 1); + auto BoolSrc = B.buildAnd({SgprRB, I32}, TruncS32Src, One); B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc}); cleanUpAfterCombine(MI, Trunc); return; @@ -192,7 +193,7 @@ class AMDGPURegBankLegalizeCombiner { // %Dst = G_... %TruncSrc Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); - if (MRI.getType(Src) != S1) + if (MRI.getType(Src) != I1) return; auto [Trunc, TruncSrc] = tryMatch(Src, AMDGPU::G_TRUNC); @@ -210,20 +211,20 @@ class AMDGPURegBankLegalizeCombiner { B.setInstr(MI); - if (DstTy == S32 && TruncSrcTy == S64) { - auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc); + if (DstTy == I32 && TruncSrcTy == I64) { + auto Unmerge = B.buildUnmerge({SgprRB, I32}, TruncSrc); MRI.replaceRegWith(Dst, Unmerge.getReg(0)); cleanUpAfterCombine(MI, Trunc); return; } - if (DstTy == S32 && TruncSrcTy == S16) { + if (DstTy == I32 && TruncSrcTy == I16) { B.buildAnyExt(Dst, TruncSrc); cleanUpAfterCombine(MI, Trunc); return; } - if (DstTy == S16 && TruncSrcTy == S32) { + if (DstTy == I16 && TruncSrcTy == I32) { B.buildTrunc(Dst, TruncSrc); cleanUpAfterCombine(MI, Trunc); return; @@ -305,7 +306,7 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { // Opcodes that support pretty much all combinations of reg banks and LLTs // (except S1). There is no point in writing rules for them. if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES || - Opc == AMDGPU::G_MERGE_VALUES) { + Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) { RBLHelper.applyMappingTrivial(*MI); continue; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 24463b138cea6..9fa67d94ca06d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -215,7 +215,7 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, LLT EltTy = DstTy.getElementType(); B128 = LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy); } else { - B128 = LLT::scalar(128); + B128 = LLT::integer(128); } if (Size / 128 == 2) splitLoad(MI, {B128, B128}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 346289bac40e2..b7255eefacd90 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1969,7 +1969,7 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect( IsDivergentIdx, &Subtarget)) return false; - LLT S32 = LLT::scalar(32); + LLT I32 = LLT::integer(32); const RegisterBank &DstBank = *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; @@ -1981,10 +1981,10 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect( SrcBank == AMDGPU::SGPRRegBank && IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank : AMDGPU::VCCRegBank; - LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1); + LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? I32 : LLT::integer(1); if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) { - Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg(); + Idx = B.buildCopy(I32, Idx)->getOperand(0).getReg(); MRI.setRegBank(Idx, AMDGPU::VGPRRegBank); } @@ -1996,13 +1996,19 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect( else EltTy = MRI.getType(DstRegs[0]); + if (VecTy.isFloatVector()) { + auto ClassOrBank = MRI.getRegClassOrRegBank(VecReg); + VecReg = B.buildBitcast({ClassOrBank, VecTy.changeToInteger()}, VecReg).getReg(0); + } + auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg); SmallVector Res(NumLanes); - for (unsigned L = 0; L < NumLanes; ++L) + for (unsigned L = 0; L < NumLanes; ++L) { Res[L] = UnmergeToEltTy.getReg(L); + } for (unsigned I = 1; I < NumElem; ++I) { - auto IC = B.buildConstant(S32, I); + auto IC = B.buildConstant(I32, I); MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank); auto Cmp = B.buildICmp(CmpInst::ICMP_EQ, CCTy, Idx, IC); MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank); @@ -2067,7 +2073,7 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect( IsDivergentIdx, &Subtarget)) return false; - LLT S32 = LLT::scalar(32); + LLT I32 = LLT::integer(32); const RegisterBank &DstBank = *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; @@ -2082,10 +2088,10 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect( InsBank == AMDGPU::SGPRRegBank && IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank : AMDGPU::VCCRegBank; - LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1); + LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? I32 : LLT::integer(1); if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) { - Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg(); + Idx = B.buildCopy(I32, Idx)->getOperand(0).getReg(); MRI.setRegBank(Idx, AMDGPU::VGPRRegBank); } @@ -2099,11 +2105,17 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect( EltTy = MRI.getType(InsRegs[0]); } + if (VecTy.getScalarType().isFloat() && !EltTy.isFloat()) { + auto RegBankOrClass = MRI.getRegClassOrRegBank(VecReg); + auto CastTy = VecTy.changeToInteger(); + VecReg = B.buildBitcast({RegBankOrClass, CastTy}, VecReg).getReg(0); + } + auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg); SmallVector Ops(NumElem * NumLanes); for (unsigned I = 0; I < NumElem; ++I) { - auto IC = B.buildConstant(S32, I); + auto IC = B.buildConstant(I32, I); MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank); auto Cmp = B.buildICmp(CmpInst::ICMP_EQ, CCTy, Idx, IC); MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank); @@ -2156,7 +2168,7 @@ void AMDGPURegisterBankInfo::applyMappingSMULU64( MachineRegisterInfo &MRI = OpdMapper.getMRI(); MachineInstr &MI = OpdMapper.getMI(); Register DstReg = MI.getOperand(0).getReg(); - LLT HalfTy = LLT::scalar(32); + LLT HalfTy = LLT::integer(32); // Depending on where the source registers came from, the generic code may // have decided to split the inputs already or not. If not, we still need to @@ -2828,7 +2840,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - const LLT S32 = LLT::scalar(32); + const LLT I32 = LLT::integer(32); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); @@ -2891,10 +2903,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( assert(DstTy.getSizeInBits() == 64); - LLT Vec32 = LLT::fixed_vector(2 * SrcTy.getNumElements(), 32); + LLT Vec32 = LLT::fixed_vector(2 * SrcTy.getNumElements(), I32); auto CastSrc = B.buildBitcast(Vec32, SrcReg); - auto One = B.buildConstant(S32, 1); + auto One = B.buildConstant(I32, 1); MachineBasicBlock::iterator MII = MI.getIterator(); @@ -2905,8 +2917,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MachineInstrSpan Span(MII, &B.getMBB()); // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). - auto IdxLo = B.buildShl(S32, BaseIdxReg, One); - auto IdxHi = B.buildAdd(S32, IdxLo, One); + auto IdxLo = B.buildShl(I32, BaseIdxReg, One); + auto IdxHi = B.buildAdd(I32, IdxLo, One); auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo); auto Extract1 = B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi); @@ -2932,8 +2944,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( if (NeedCopyToVGPR) { MachineBasicBlock *LoopBB = Extract1->getParent(); - Register TmpReg0 = MRI.createGenericVirtualRegister(S32); - Register TmpReg1 = MRI.createGenericVirtualRegister(S32); + Register TmpReg0 = MRI.createGenericVirtualRegister(I32); + Register TmpReg1 = MRI.createGenericVirtualRegister(I32); MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank); MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank); From fb4bd70f3ae76e28290296c72728205ba92f18b9 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 12 Feb 2025 13:45:27 +0100 Subject: [PATCH 08/16] FPInfo: AMDGPULegalizerInfo and Combiner --- .../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 19 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2541 +++++++++-------- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 11 +- .../AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 11 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +- 6 files changed, 1374 insertions(+), 1216 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp index 0b18c6b0e923a..bee4553f5760d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -120,7 +120,7 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) { } // Large scalars and 64-bit pointers - return LLT::scalar(32); + return LLT::integer(32); } static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, @@ -131,9 +131,17 @@ static void unmergeReadAnyLane(MachineIRBuilder &B, LLT UnmergeTy, Register VgprSrc, const RegisterBankInfo &RBI) { const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID); - auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc); + LLT Ty = B.getMRI()->getType(VgprSrc); + if (Ty.getScalarType().isFloat()) { + VgprSrc = B.buildBitcast({VgprRB, Ty.changeToInteger()}, VgprSrc).getReg(0); + } + auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy.changeToInteger()}, VgprSrc); for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { - SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI)); + Register Op = Unmerge.getReg(i); + if (UnmergeTy.getScalarType().isFloat()) { + Op = B.buildBitcast({VgprRB, UnmergeTy}, Op).getReg(0); + } + SgprDstParts.push_back(buildReadAnyLane(B, Op, RBI)); } } @@ -149,6 +157,11 @@ static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, SmallVector SgprDstParts; unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); + if (Ty.getScalarType().isFloat()) { + auto Merge = B.buildMergeLikeInstr({SgprRB, Ty.changeToInteger()}, SgprDstParts); + return B.buildBitcast({SgprRB, Ty}, Merge).getReg(0); + } + return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index d455ace4e394a..fc48a6c181360 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -23,12 +23,17 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -52,6 +57,138 @@ static cl::opt EnableNewLegality( static constexpr unsigned MaxRegisterSize = 1024; +static const LLT I1 = LLT::integer(1); +static const LLT I8 = LLT::integer(8); +static const LLT I16 = LLT::integer(16); +static const LLT F16 = LLT::float16(); +static const LLT BF16 = LLT::bfloat16(); +static const LLT I32 = LLT::integer(32); +static const LLT F32 = LLT::float32(); +static const LLT I64 = LLT::integer(64); +static const LLT F64 = LLT::float64(); +static const LLT I96 = LLT::integer(96); +static const LLT I128 = LLT::integer(128); +static const LLT I160 = LLT::integer(160); +static const LLT I192 = LLT::integer(192); +static const LLT I224 = LLT::integer(224); +static const LLT I256 = LLT::integer(256); +static const LLT I512 = LLT::integer(512); +static const LLT I1024 = LLT::integer(1024); +static const LLT MaxScalar = LLT::integer(MaxRegisterSize); + +static const LLT V2I8 = LLT::fixed_vector(2, I8); + +static const LLT V2I16 = LLT::fixed_vector(2, I16); +static const LLT V3I16 = LLT::fixed_vector(3, I16); +static const LLT V4I16 = LLT::fixed_vector(4, I16); +static const LLT V6I16 = LLT::fixed_vector(6, I16); +static const LLT V8I16 = LLT::fixed_vector(8, I16); +static const LLT V10I16 = LLT::fixed_vector(10, I16); +static const LLT V12I16 = LLT::fixed_vector(12, I16); +static const LLT V16I16 = LLT::fixed_vector(16, I16); + +static const LLT V2F16 = LLT::fixed_vector(2, F16); +static const LLT V4F16 = LLT::fixed_vector(4, F16); +static const LLT V6F16 = LLT::fixed_vector(6, F16); +static const LLT V8F16 = LLT::fixed_vector(8, F16); +static const LLT V10F16 = LLT::fixed_vector(10, F16); +static const LLT V12F16 = LLT::fixed_vector(12, F16); +static const LLT V16F16 = LLT::fixed_vector(16, F16); + +static const LLT V2BF16 = LLT::fixed_vector(2, BF16); +static const LLT V4BF16 = LLT::fixed_vector(4, BF16); +static const LLT V6BF16 = LLT::fixed_vector(6, BF16); +static const LLT V8BF16 = LLT::fixed_vector(8, BF16); +static const LLT V10BF16 = LLT::fixed_vector(10, BF16); +static const LLT V12BF16 = LLT::fixed_vector(12, BF16); +static const LLT V16BF16 = LLT::fixed_vector(16, BF16); + +static const LLT V2I32 = LLT::fixed_vector(2, I32); +static const LLT V3I32 = LLT::fixed_vector(3, I32); +static const LLT V4I32 = LLT::fixed_vector(4, I32); +static const LLT V5I32 = LLT::fixed_vector(5, I32); +static const LLT V6I32 = LLT::fixed_vector(6, I32); +static const LLT V7I32 = LLT::fixed_vector(7, I32); +static const LLT V8I32 = LLT::fixed_vector(8, I32); +static const LLT V9I32 = LLT::fixed_vector(9, I32); +static const LLT V10I32 = LLT::fixed_vector(10, I32); +static const LLT V11I32 = LLT::fixed_vector(11, I32); +static const LLT V12I32 = LLT::fixed_vector(12, I32); +static const LLT V16I32 = LLT::fixed_vector(16, I32); +static const LLT V32I32 = LLT::fixed_vector(32, I32); + +static const LLT V2F32 = LLT::fixed_vector(2, F32); +static const LLT V3F32 = LLT::fixed_vector(3, F32); +static const LLT V4F32 = LLT::fixed_vector(4, F32); +static const LLT V5F32 = LLT::fixed_vector(5, F32); +static const LLT V6F32 = LLT::fixed_vector(6, F32); +static const LLT V7F32 = LLT::fixed_vector(7, F32); +static const LLT V8F32 = LLT::fixed_vector(8, F32); +static const LLT V9F32 = LLT::fixed_vector(9, F32); +static const LLT V10F32 = LLT::fixed_vector(10, F32); +static const LLT V11F32 = LLT::fixed_vector(11, F32); +static const LLT V12F32 = LLT::fixed_vector(12, F32); +static const LLT V16F32 = LLT::fixed_vector(16, F32); +static const LLT V32F32 = LLT::fixed_vector(32, F32); + +static const LLT V2I64 = LLT::fixed_vector(2, I64); +static const LLT V3I64 = LLT::fixed_vector(3, I64); +static const LLT V4I64 = LLT::fixed_vector(4, I64); +static const LLT V5I64 = LLT::fixed_vector(5, I64); +static const LLT V6I64 = LLT::fixed_vector(6, I64); +static const LLT V7I64 = LLT::fixed_vector(7, I64); +static const LLT V8I64 = LLT::fixed_vector(8, I64); +static const LLT V16I64 = LLT::fixed_vector(16, I64); + +static const LLT V2F64 = LLT::fixed_vector(2, F64); +static const LLT V3F64 = LLT::fixed_vector(3, F64); +static const LLT V4F64 = LLT::fixed_vector(4, F64); +static const LLT V5F64 = LLT::fixed_vector(5, F64); +static const LLT V6F64 = LLT::fixed_vector(6, F64); +static const LLT V7F64 = LLT::fixed_vector(7, F64); +static const LLT V8F64 = LLT::fixed_vector(8, F64); +static const LLT V16F64 = LLT::fixed_vector(16, F64); + +static const LLT V2I128 = LLT::fixed_vector(2, I128); +static const LLT V4I128 = LLT::fixed_vector(4, I128); + +static std::initializer_list AllScalarTypes = { + I16, F16, BF16, I32, F32, I64, F64, I96, + I128, I160, I192, I224, I256, I512, I1024}; + +static std::initializer_list AllS16Vectors{ + V2I16, V2F16, V2BF16, V4I16, V4F16, V4BF16, V6I16, V6F16, + V6BF16, V8I16, V8F16, V8BF16, V10I16, V10F16, V10BF16, V12I16, + V12F16, V12BF16, V16I16, V16F16, V16BF16, V2I128, V4I128, +}; + +static std::initializer_list AllS32Vectors = { + V2I32, V2F32, V3I32, V3F32, V4I32, V4F32, V5I32, V5F32, V6I32, + V6F32, V7I32, V7F32, V8I32, V8F32, V9I32, V9F32, V10I32, V10F32, + V11I32, V11F32, V12I32, V12F32, V16I32, V16F32, V32I32, V32F32, +}; + +static std::initializer_list AllI32Vectors = { + V2I32, V3I32, V4I32, V5I32, V6I32, V7I32, V8I32, + V9I32, V10I32, V11I32, V12I32, V16I32, V32I32, +}; + +static std::initializer_list AllF32Vectors = { + V2F32, V3F32, V4F32, V5F32, V6F32, V7F32, V8F32, + V9F32, V10F32, V11F32, V12F32, V16F32, V32F32, +}; + +static std::initializer_list AllS64Vectors = { + V2I64, V2F64, V3I64, V3F64, V4I64, V4F64, V5I64, V5F64, + V6I64, V6F64, V7I64, V7F64, V8I64, V8F64, V16I64, V16F64, +}; + +static std::initializer_list AllI64Vectors = {V2I64, V3I64, V4I64, V5I64, + V6I64, V7I64, V8I64, V16I64}; + +static std::initializer_list AllF64Vectors = {V2F64, V3F64, V4F64, V5F64, + V6F64, V7F64, V8F64, V16F64}; + // Round the number of elements to the next power of two elements static LLT getPow2VectorType(LLT Ty) { unsigned NElts = Ty.getNumElements(); @@ -60,10 +197,10 @@ static LLT getPow2VectorType(LLT Ty) { } // Round the number of bits to the next power of two bits -static LLT getPow2ScalarType(LLT Ty) { +static LLT getPow2IntegerType(LLT Ty) { unsigned Bits = Ty.getSizeInBits(); unsigned Pow2Bits = 1 << Log2_32_Ceil(Bits); - return LLT::scalar(Pow2Bits); + return LLT::integer(Pow2Bits); } /// \returns true if this is an odd sized vector which should widen by adding an @@ -161,16 +298,16 @@ static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) { static LLT getBufferRsrcScalarType(const LLT Ty) { if (!Ty.isVector()) - return LLT::scalar(128); + return I128; const ElementCount NumElems = Ty.getElementCount(); - return LLT::vector(NumElems, LLT::scalar(128)); + return LLT::vector(NumElems, I128); } static LLT getBufferRsrcRegisterType(const LLT Ty) { if (!Ty.isVector()) - return LLT::fixed_vector(4, LLT::scalar(32)); + return V4I32; const unsigned NumElems = Ty.getElementCount().getFixedValue(); - return LLT::fixed_vector(NumElems * 4, LLT::scalar(32)); + return LLT::fixed_vector(NumElems * 4, I32); } static LLT getBitcastRegisterType(const LLT Ty) { @@ -179,10 +316,10 @@ static LLT getBitcastRegisterType(const LLT Ty) { if (Size <= 32) { // <2 x s8> -> s16 // <4 x s8> -> s32 - return LLT::scalar(Size); + return LLT::integer(Size); } - return LLT::scalarOrVector(ElementCount::getFixed(Size / 32), 32); + return LLT::scalarOrVector(ElementCount::getFixed(Size / 32), I32); } static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { @@ -192,13 +329,13 @@ static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { }; } -static LegalizeMutation bitcastToVectorElement32(unsigned TypeIdx) { +static LegalizeMutation bitcastToVectorElementI32(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; unsigned Size = Ty.getSizeInBits(); assert(Size % 32 == 0); return std::pair( - TypeIdx, LLT::scalarOrVector(ElementCount::getFixed(Size / 32), 32)); + TypeIdx, LLT::scalarOrVector(ElementCount::getFixed(Size / 32), I32)); }; } @@ -282,82 +419,14 @@ static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) { }; } -static const LLT S1 = LLT::scalar(1); -static const LLT S8 = LLT::scalar(8); -static const LLT S16 = LLT::scalar(16); -static const LLT S32 = LLT::scalar(32); -static const LLT F32 = LLT::float32(); -static const LLT S64 = LLT::scalar(64); -static const LLT F64 = LLT::float64(); -static const LLT S96 = LLT::scalar(96); -static const LLT S128 = LLT::scalar(128); -static const LLT S160 = LLT::scalar(160); -static const LLT S192 = LLT::scalar(192); -static const LLT S224 = LLT::scalar(224); -static const LLT S256 = LLT::scalar(256); -static const LLT S512 = LLT::scalar(512); -static const LLT S1024 = LLT::scalar(1024); -static const LLT MaxScalar = LLT::scalar(MaxRegisterSize); - -static const LLT V2S8 = LLT::fixed_vector(2, 8); -static const LLT V2S16 = LLT::fixed_vector(2, 16); -static const LLT V4S16 = LLT::fixed_vector(4, 16); -static const LLT V6S16 = LLT::fixed_vector(6, 16); -static const LLT V8S16 = LLT::fixed_vector(8, 16); -static const LLT V10S16 = LLT::fixed_vector(10, 16); -static const LLT V12S16 = LLT::fixed_vector(12, 16); -static const LLT V16S16 = LLT::fixed_vector(16, 16); - -static const LLT V2F16 = LLT::fixed_vector(2, LLT::float16()); -static const LLT V2BF16 = V2F16; // FIXME - -static const LLT V2S32 = LLT::fixed_vector(2, 32); -static const LLT V3S32 = LLT::fixed_vector(3, 32); -static const LLT V4S32 = LLT::fixed_vector(4, 32); -static const LLT V5S32 = LLT::fixed_vector(5, 32); -static const LLT V6S32 = LLT::fixed_vector(6, 32); -static const LLT V7S32 = LLT::fixed_vector(7, 32); -static const LLT V8S32 = LLT::fixed_vector(8, 32); -static const LLT V9S32 = LLT::fixed_vector(9, 32); -static const LLT V10S32 = LLT::fixed_vector(10, 32); -static const LLT V11S32 = LLT::fixed_vector(11, 32); -static const LLT V12S32 = LLT::fixed_vector(12, 32); -static const LLT V16S32 = LLT::fixed_vector(16, 32); -static const LLT V32S32 = LLT::fixed_vector(32, 32); - -static const LLT V2S64 = LLT::fixed_vector(2, 64); -static const LLT V3S64 = LLT::fixed_vector(3, 64); -static const LLT V4S64 = LLT::fixed_vector(4, 64); -static const LLT V5S64 = LLT::fixed_vector(5, 64); -static const LLT V6S64 = LLT::fixed_vector(6, 64); -static const LLT V7S64 = LLT::fixed_vector(7, 64); -static const LLT V8S64 = LLT::fixed_vector(8, 64); -static const LLT V16S64 = LLT::fixed_vector(16, 64); - -static const LLT V2S128 = LLT::fixed_vector(2, 128); -static const LLT V4S128 = LLT::fixed_vector(4, 128); - -static std::initializer_list AllScalarTypes = { - S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024}; - -static std::initializer_list AllS16Vectors{ - V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128}; - -static std::initializer_list AllS32Vectors = { - V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32, - V9S32, V10S32, V11S32, V12S32, V16S32, V32S32}; - -static std::initializer_list AllS64Vectors = {V2S64, V3S64, V4S64, V5S64, - V6S64, V7S64, V8S64, V16S64}; - // Checks whether a type is in the list of legal register types. static bool isRegisterClassType(const GCNSubtarget &ST, LLT Ty) { if (Ty.isPointerOrPointerVector()) - Ty = Ty.changeElementType(LLT::scalar(Ty.getScalarSizeInBits())); + Ty = Ty.changeElementType(LLT::integer(Ty.getScalarSizeInBits())); return is_contained(AllS32Vectors, Ty) || is_contained(AllS64Vectors, Ty) || is_contained(AllScalarTypes, Ty) || - (ST.useRealTrue16Insts() && Ty == S16) || + (ST.useRealTrue16Insts() && Ty == I16) || is_contained(AllS16Vectors, Ty); } @@ -603,14 +672,13 @@ static LLT castBufferRsrcFromV4I32(MachineInstr &MI, MachineIRBuilder &B, if (!PointerTy.isVector()) { // Happy path: (4 x s32) -> (s32, s32, s32, s32) -> (p8) const unsigned NumParts = PointerTy.getSizeInBits() / 32; - const LLT S32 = LLT::scalar(32); Register VectorReg = MRI.createGenericVirtualRegister(VectorTy); std::array VectorElems; B.setInsertPt(B.getMBB(), ++B.getInsertPt()); for (unsigned I = 0; I < NumParts; ++I) VectorElems[I] = - B.buildExtractVectorElementConstant(S32, VectorReg, I).getReg(0); + B.buildExtractVectorElementConstant(I32, VectorReg, I).getReg(0); B.buildMergeValues(MO, VectorElems); MO.setReg(VectorReg); return VectorTy; @@ -639,7 +707,7 @@ static Register castBufferRsrcToV4I32(Register Pointer, MachineIRBuilder &B) { // Special case: p8 -> (s32, s32, s32, s32) -> (4xs32) SmallVector PointerParts; const unsigned NumParts = PointerTy.getSizeInBits() / 32; - auto Unmerged = B.buildUnmerge(LLT::scalar(32), Pointer); + auto Unmerged = B.buildUnmerge(I32, Pointer); for (unsigned I = 0; I < NumParts; ++I) PointerParts.push_back(Unmerged.getReg(I)); return B.buildBuildVector(VectorTy, PointerParts).getReg(0); @@ -692,36 +760,38 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const std::initializer_list AddrSpaces128 = {RsrcPtr}; - const std::initializer_list FPTypesBase = { - S32, S64 - }; + const std::initializer_list FPTypesBase = {F32, F64}; - const std::initializer_list FPTypes16 = { - S32, S64, S16 - }; + const std::initializer_list IEEEFPTypes16 = {F32, F64, F16}; - const std::initializer_list FPTypesPK16 = { - S32, S64, S16, V2S16 - }; + const std::initializer_list FPTypes16 = {F32, F64, F16, BF16}; - const LLT MinScalarFPTy = ST.has16BitInsts() ? S16 : S32; + const std::initializer_list FPTypesPK16 = {F32, F64, F16, + BF16, V2F16, V2BF16}; + + const std::initializer_list IEEEFPTypesPK16 = {F32, F64, F16, BF16, + V2F16}; + + const LLT MinScalarFPTy = ST.has16BitInsts() ? F16 : F32; // s1 for VCC branches, s32 for SCC branches. - getActionDefinitionsBuilder(G_BRCOND).legalFor({S1, S32}); + getActionDefinitionsBuilder(G_BRCOND).legalFor({I1, I32}); // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more // elements for v3s16 getActionDefinitionsBuilder(G_PHI) - .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256}) + .legalFor({I32, F32, I64, F64, V2I16, V2F16, V2BF16, I16, F16, BF16, + V4I16, V4F16, V4BF16, I1, I128, I256}) .legalFor(AllS32Vectors) .legalFor(AllS64Vectors) .legalFor(AddrSpaces64) .legalFor(AddrSpaces32) .legalFor(AddrSpaces128) .legalIf(isPointer(0)) - .clampScalar(0, S16, S256) + .clampScalar(0, I16, I256) .widenScalarToNextPow2(0, 32) - .clampMaxNumElements(0, S32, 16) + .clampMaxNumElements(0, I32, 16) + .clampMaxNumElements(0, F32, 16) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .scalarize(0); @@ -729,134 +799,132 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Full set of gfx9 features. if (ST.hasScalarAddSub64()) { getActionDefinitionsBuilder({G_ADD, G_SUB}) - .legalFor({S64, S32, S16, V2S16}) - .clampMaxNumElementsStrict(0, S16, 2) + .legalFor({I64, I32, I16, V2I16}) + .clampMaxNumElementsStrict(0, I16, 2) .scalarize(0) - .minScalar(0, S16) + .minScalar(0, I16) .widenScalarToNextMultipleOf(0, 32) - .maxScalar(0, S32); + .maxScalar(0, I32); } else { getActionDefinitionsBuilder({G_ADD, G_SUB}) - .legalFor({S32, S16, V2S16}) - .clampMaxNumElementsStrict(0, S16, 2) + .legalFor({I32, I16, V2I16}) + .clampMaxNumElementsStrict(0, I16, 2) .scalarize(0) - .minScalar(0, S16) + .minScalar(0, I16) .widenScalarToNextMultipleOf(0, 32) - .maxScalar(0, S32); + .maxScalar(0, I32); } if (ST.hasScalarSMulU64()) { getActionDefinitionsBuilder(G_MUL) - .legalFor({S64, S32, S16, V2S16}) - .clampMaxNumElementsStrict(0, S16, 2) + .legalFor({I64, I32, I16, V2I16}) + .clampMaxNumElementsStrict(0, I16, 2) .scalarize(0) - .minScalar(0, S16) + .minScalar(0, I16) .widenScalarToNextMultipleOf(0, 32) .custom(); } else { getActionDefinitionsBuilder(G_MUL) - .legalFor({S32, S16, V2S16}) - .clampMaxNumElementsStrict(0, S16, 2) + .legalFor({I32, I16, V2I16}) + .clampMaxNumElementsStrict(0, I16, 2) .scalarize(0) - .minScalar(0, S16) + .minScalar(0, I16) .widenScalarToNextMultipleOf(0, 32) .custom(); } assert(ST.hasMad64_32()); getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) - .legalFor({S32, S16, V2S16}) // Clamp modifier - .minScalarOrElt(0, S16) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .widenScalarToNextPow2(0, 32) - .lower(); + .legalFor({I32, I16, V2I16}) // Clamp modifier + .minScalarOrElt(0, I16) + .clampMaxNumElementsStrict(0, I16, 2) + .scalarize(0) + .widenScalarToNextPow2(0, 32) + .lower(); } else if (ST.has16BitInsts()) { getActionDefinitionsBuilder({G_ADD, G_SUB}) - .legalFor({S32, S16}) - .minScalar(0, S16) - .widenScalarToNextMultipleOf(0, 32) - .maxScalar(0, S32) - .scalarize(0); + .legalFor({I32, I16}) + .minScalar(0, I16) + .widenScalarToNextMultipleOf(0, 32) + .maxScalar(0, I32) + .scalarize(0); getActionDefinitionsBuilder(G_MUL) - .legalFor({S32, S16}) - .scalarize(0) - .minScalar(0, S16) - .widenScalarToNextMultipleOf(0, 32) - .custom(); + .legalFor({I32, I16}) + .scalarize(0) + .minScalar(0, I16) + .widenScalarToNextMultipleOf(0, 32) + .custom(); assert(ST.hasMad64_32()); // Technically the saturating operations require clamp bit support, but this // was introduced at the same time as 16-bit operations. getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) - .legalFor({S32, S16}) // Clamp modifier - .minScalar(0, S16) - .scalarize(0) - .widenScalarToNextPow2(0, 16) - .lower(); + .legalFor({I32, I16}) // Clamp modifier + .minScalar(0, I16) + .scalarize(0) + .widenScalarToNextPow2(0, 16) + .lower(); // We're just lowering this, but it helps get a better result to try to // coerce to the desired type first. getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}) - .minScalar(0, S16) - .scalarize(0) - .lower(); + .minScalar(0, I16) + .scalarize(0) + .lower(); } else { getActionDefinitionsBuilder({G_ADD, G_SUB}) - .legalFor({S32}) - .widenScalarToNextMultipleOf(0, 32) - .clampScalar(0, S32, S32) - .scalarize(0); + .legalFor({I32}) + .widenScalarToNextMultipleOf(0, 32) + .clampScalar(0, I32, I32) + .scalarize(0); auto &Mul = getActionDefinitionsBuilder(G_MUL) - .legalFor({S32}) - .scalarize(0) - .minScalar(0, S32) - .widenScalarToNextMultipleOf(0, 32); + .legalFor({I32}) + .scalarize(0) + .minScalar(0, I32) + .widenScalarToNextMultipleOf(0, 32); if (ST.hasMad64_32()) Mul.custom(); else - Mul.maxScalar(0, S32); + Mul.maxScalar(0, I32); if (ST.hasIntClamp()) { getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) - .legalFor({S32}) // Clamp modifier. - .scalarize(0) - .minScalarOrElt(0, S32) - .lower(); + .legalFor({I32}) // Clamp modifier. + .scalarize(0) + .minScalarOrElt(0, I32) + .lower(); } else { // Clamp bit support was added in VI, along with 16-bit operations. getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) - .minScalar(0, S32) - .scalarize(0) - .lower(); + .minScalar(0, I32) + .scalarize(0) + .lower(); } // FIXME: DAG expansion gets better results. The widening uses the smaller // range values and goes for the min/max lowering directly. getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}) - .minScalar(0, S32) - .scalarize(0) - .lower(); + .minScalar(0, I32) + .scalarize(0) + .lower(); } getActionDefinitionsBuilder( {G_SDIV, G_UDIV, G_SREM, G_UREM, G_SDIVREM, G_UDIVREM}) - .customFor({S32, S64}) - .clampScalar(0, S32, S64) + .customFor({I32, I64}) + .clampScalar(0, I32, I64) .widenScalarToNextPow2(0, 32) .scalarize(0); auto &Mulh = getActionDefinitionsBuilder({G_UMULH, G_SMULH}) - .legalFor({S32}) - .maxScalar(0, S32); + .legalFor({I32}) + .maxScalar(0, I32); if (ST.hasVOP3PInsts()) { - Mulh - .clampMaxNumElements(0, S8, 2) - .lowerFor({V2S8}); + Mulh.clampMaxNumElements(0, I8, 2).lowerFor({V2I8}); } Mulh @@ -866,17 +934,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Report legal for any types we can handle anywhere. For the cases only legal // on the SALU, RegBankSelect will be able to re-legalize. getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) - .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) - .clampScalar(0, S32, S64) - .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0)) - .widenScalarToNextPow2(0) - .scalarize(0); + .legalFor({I32, I1, I64, V2I32, I16, V2I16, V4I16}) + .clampScalar(0, I32, I64) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0)) + .widenScalarToNextPow2(0) + .scalarize(0); getActionDefinitionsBuilder( {G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE}) - .legalFor({{S32, S1}, {S32, S32}}) - .clampScalar(0, S32, S32) + .legalFor({{I32, I1}, {I32, I32}}) + .clampScalar(0, I32, I32) .scalarize(0); getActionDefinitionsBuilder(G_BITCAST) @@ -885,68 +953,69 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .lower(); getActionDefinitionsBuilder(G_CONSTANT) - .legalFor({S1, S32, S64, S16, GlobalPtr, - LocalPtr, ConstantPtr, PrivatePtr, FlatPtr }) - .legalIf(isPointer(0)) - .clampScalar(0, S32, S64) - .widenScalarToNextPow2(0); + .legalFor({I1, I32, I64, I16, GlobalPtr, LocalPtr, ConstantPtr, + PrivatePtr, FlatPtr}) + .legalIf(isPointer(0)) + .clampScalar(0, I32, I64) + .widenScalarToNextPow2(0); getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({S32, S64, S16}) - .clampScalar(0, S16, S64); + .legalFor({F32, F64, F16, BF16}) + .clampScalar(0, F16, F64); getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) .legalIf(isRegisterClassType(ST, 0)) // s1 and s16 are special cases because they have legal operations on // them, but don't really occupy registers in the normal way. - .legalFor({S1, S16}) - .clampNumElements(0, V16S32, V32S32) + .legalFor({I1, I16, F16, BF16}) + .clampNumElements(0, V16I32, V32I32) + .clampNumElements(0, V16F32, V32F32) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .clampScalarOrElt(0, S32, MaxScalar) + .clampScalarOrElt(0, I32, MaxScalar) .widenScalarToNextPow2(0, 32) - .clampMaxNumElements(0, S32, 16); + .clampMaxNumElements(0, I32, 16) + .clampMaxNumElements(0, F32, 16); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({PrivatePtr}); // If the amount is divergent, we have to do a wave reduction to get the // maximum value, so this is expanded during RegBankSelect. getActionDefinitionsBuilder(G_DYN_STACKALLOC) - .legalFor({{PrivatePtr, S32}}); + .legalFor({{PrivatePtr, I32}, {PrivatePtr, F32}}); getActionDefinitionsBuilder(G_STACKSAVE) .customFor({PrivatePtr}); getActionDefinitionsBuilder(G_STACKRESTORE) .legalFor({PrivatePtr}); - getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV}).customFor({S64}); + getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV}).customFor({I64}); getActionDefinitionsBuilder(G_GLOBAL_VALUE) .customIf(typeIsNot(0, PrivatePtr)); getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({CodePtr}); - auto &FPOpActions = getActionDefinitionsBuilder( - { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE, - G_STRICT_FADD, G_STRICT_FMUL, G_STRICT_FMA}) - .legalFor({S32, S64}); - auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS}) - .customFor({S32, S64}); - auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV) - .customFor({S32, S64}); + auto &FPOpActions = + getActionDefinitionsBuilder({G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE, + G_STRICT_FADD, G_STRICT_FMUL, G_STRICT_FMA}) + .legalFor({F32, F64}); + auto &TrigActions = + getActionDefinitionsBuilder({G_FSIN, G_FCOS}).customFor({F32, F64}); + auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV).customFor({F32, F64}); if (ST.has16BitInsts()) { if (ST.hasVOP3PInsts()) - FPOpActions.legalFor({S16, V2S16}); + FPOpActions.legalFor({F16, V2F16}); else - FPOpActions.legalFor({S16}); + FPOpActions.legalFor({F16}); - TrigActions.customFor({S16}); - FDIVActions.customFor({S16}); + TrigActions.customFor({F16}); + FDIVActions.customFor({F16}); } if (ST.hasPackedFP32Ops()) { - FPOpActions.legalFor({V2S32}); - FPOpActions.clampMaxNumElementsStrict(0, S32, 2); + FPOpActions.legalFor({V2F32}); + FPOpActions.clampMaxNumElementsStrict(0, F32, 2); } auto &MinNumMaxNum = getActionDefinitionsBuilder({ @@ -954,201 +1023,187 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasVOP3PInsts()) { MinNumMaxNum.customFor(FPTypesPK16) - .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .clampMaxNumElements(0, S16, 2) - .clampScalar(0, S16, S64) - .scalarize(0); + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .clampMaxNumElements(0, F16, 2) + .clampScalar(0, F16, F64) + .scalarize(0); } else if (ST.has16BitInsts()) { - MinNumMaxNum.customFor(FPTypes16) - .clampScalar(0, S16, S64) - .scalarize(0); + MinNumMaxNum.customFor(FPTypes16).clampScalar(0, F16, F64).scalarize(0); } else { - MinNumMaxNum.customFor(FPTypesBase) - .clampScalar(0, S32, S64) - .scalarize(0); + MinNumMaxNum.customFor(FPTypesBase).clampScalar(0, F32, F64).scalarize(0); } if (ST.hasVOP3PInsts()) - FPOpActions.clampMaxNumElementsStrict(0, S16, 2); + FPOpActions.clampMaxNumElementsStrict(0, F16, 2); - FPOpActions - .scalarize(0) - .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + FPOpActions.scalarize(0).clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64); - TrigActions - .scalarize(0) - .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + TrigActions.scalarize(0).clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64); - FDIVActions - .scalarize(0) - .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + FDIVActions.scalarize(0).clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64); getActionDefinitionsBuilder({G_FNEG, G_FABS}) - .legalFor(FPTypesPK16) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .clampScalar(0, S16, S64); + .legalFor(FPTypesPK16) + .clampMaxNumElementsStrict(0, F16, 2) + .clampMaxNumElementsStrict(0, BF16, 2) + .scalarize(0) + .clampScalar(0, F16, F64); if (ST.has16BitInsts()) { getActionDefinitionsBuilder(G_FSQRT) - .legalFor({S16}) - .customFor({S32, S64}) - .scalarize(0) - .unsupported(); + .legalFor({F16}) + .customFor({F32, F64}) + .scalarize(0) + .unsupported(); getActionDefinitionsBuilder(G_FFLOOR) - .legalFor({S32, S64, S16}) - .scalarize(0) - .clampScalar(0, S16, S64); + .legalFor({F32, F64, F16}) + .scalarize(0) + .clampScalar(0, F16, F64); getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP}) - .legalFor({{S32, S32}, {S64, S32}, {S16, S16}}) - .scalarize(0) - .maxScalarIf(typeIs(0, S16), 1, S16) - .clampScalar(1, S32, S32) - .lower(); + .legalFor({{F32, I32}, {F64, I32}, {F16, I16}}) + .scalarize(0) + .maxScalarIf(typeIs(0, F16), 1, I16) + .clampScalar(1, I32, I32) + .lower(); getActionDefinitionsBuilder(G_FFREXP) - .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}}) - .scalarize(0) - .lower(); + .customFor({{F32, I32}, {F64, I32}, {F16, I16}, {F16, I32}}) + .scalarize(0) + .lower(); } else { getActionDefinitionsBuilder(G_FSQRT) - .customFor({S32, S64, S16}) - .scalarize(0) - .unsupported(); - + .customFor({F32, F64, F16}) + .scalarize(0) + .unsupported(); if (ST.hasFractBug()) { getActionDefinitionsBuilder(G_FFLOOR) - .customFor({S64}) - .legalFor({S32, S64}) - .scalarize(0) - .clampScalar(0, S32, S64); + .customFor({F64}) + .legalFor({F32, F64}) + .scalarize(0) + .clampScalar(0, F32, F64); } else { getActionDefinitionsBuilder(G_FFLOOR) - .legalFor({S32, S64}) - .scalarize(0) - .clampScalar(0, S32, S64); + .legalFor({F32, F64}) + .scalarize(0) + .clampScalar(0, F32, F64); } getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP}) - .legalFor({{S32, S32}, {S64, S32}}) - .scalarize(0) - .clampScalar(0, S32, S64) - .clampScalar(1, S32, S32) - .lower(); + .legalFor({{F32, I32}, {F64, I32}}) + .scalarize(0) + .clampScalar(0, F32, F64) + .clampScalar(1, I32, I32) + .lower(); getActionDefinitionsBuilder(G_FFREXP) - .customFor({{S32, S32}, {S64, S32}}) - .scalarize(0) - .minScalar(0, S32) - .clampScalar(1, S32, S32) - .lower(); + .customFor({{F32, I32}, {F64, I32}}) + .scalarize(0) + .minScalar(0, F32) + .clampScalar(1, I32, I32) + .lower(); } auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC); if (ST.hasCvtPkF16F32Inst()) FPTruncActions.legalFor( - {{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}}); + {{F32, F64}, {F16, F32}, {V2F16, V2F32}, {V2F16, V2F64}}); else - FPTruncActions.legalFor({{S32, S64}, {S16, S32}}); + FPTruncActions.legalFor({{F32, F64}, {F16, F32}}); + FPTruncActions.customFor({{BF16, F32}}); FPTruncActions.scalarize(0).lower(); getActionDefinitionsBuilder(G_FPEXT) - .legalFor({{S64, S32}, {S32, S16}}) - .narrowScalarFor({{S64, S16}}, changeTo(0, S32)) - .scalarize(0); + .legalFor({{F64, F32}, {F32, F16}}) + .customFor({{F32, BF16}}) + .narrowScalarFor({{F64, F16}}, changeTo(0, F32)) + .scalarize(0); auto &FSubActions = getActionDefinitionsBuilder({G_FSUB, G_STRICT_FSUB}); if (ST.has16BitInsts()) { FSubActions - // Use actual fsub instruction - .legalFor({S32, S16}) - // Must use fadd + fneg - .lowerFor({S64, V2S16}); + // Use actual fsub instruction + .legalFor({F32, F16}) + // Must use fadd + fneg + .lowerFor({F64, V2F16}); } else { FSubActions - // Use actual fsub instruction - .legalFor({S32}) - // Must use fadd + fneg - .lowerFor({S64, S16, V2S16}); + // Use actual fsub instruction + .legalFor({F32}) + // Must use fadd + fneg + .lowerFor({F64, F16, V2F16}); } - FSubActions - .scalarize(0) - .clampScalar(0, S32, S64); + FSubActions.scalarize(0).clampScalar(0, F32, F64); // Whether this is legal depends on the floating point mode for the function. auto &FMad = getActionDefinitionsBuilder(G_FMAD); if (ST.hasMadF16() && ST.hasMadMacF32Insts()) - FMad.customFor({S32, S16}); + FMad.customFor({F32, F16}); else if (ST.hasMadMacF32Insts()) - FMad.customFor({S32}); + FMad.customFor({F32}); else if (ST.hasMadF16()) - FMad.customFor({S16}); + FMad.customFor({F16}); FMad.scalarize(0) .lower(); auto &FRem = getActionDefinitionsBuilder(G_FREM); if (ST.has16BitInsts()) { - FRem.customFor({S16, S32, S64}); + FRem.customFor({F16, F32, F64}); } else { - FRem.minScalar(0, S32) - .customFor({S32, S64}); + FRem.minScalar(0, F32).customFor({F32, F64}); } FRem.scalarize(0); // TODO: Do we need to clamp maximum bitwidth? getActionDefinitionsBuilder(G_TRUNC) - .legalIf(isScalar(0)) - .legalFor({{V2S16, V2S32}}) - .clampMaxNumElements(0, S16, 2) - // Avoid scalarizing in cases that should be truly illegal. In unresolvable - // situations (like an invalid implicit use), we don't want to infinite loop - // in the legalizer. - .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0)) - .alwaysLegal(); + .legalIf(isScalar(0)) + .legalFor({{V2I16, V2I32}}) + .clampMaxNumElements(0, I16, 2) + // Avoid scalarizing in cases that should be truly illegal. In + // unresolvable situations (like an invalid implicit use), we don't want + // to infinite loop in the legalizer. + .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0)) + .alwaysLegal(); getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) - .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, - {S32, S1}, {S64, S1}, {S16, S1}}) - .scalarize(0) - .clampScalar(0, S32, S64) - .widenScalarToNextPow2(1, 32); + .legalFor( + {{I64, I32}, {I32, I16}, {I64, I16}, {I32, I1}, {I64, I1}, {I16, I1}}) + .scalarize(0) + .clampScalar(0, I32, I64) + .widenScalarToNextPow2(1, 32); // TODO: Split s1->s64 during regbankselect for VALU. auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalFor({{S32, S32}, {S64, S32}, {S16, S32}}) - .lowerIf(typeIs(1, S1)) - .customFor({{S32, S64}, {S64, S64}}); + .legalFor({{F32, I32}, {F64, I32}, {F16, I32}}) + .lowerIf(typeIs(1, I1)) + .customFor({{F32, I64}, {F64, I64}}); if (ST.has16BitInsts()) - IToFP.legalFor({{S16, S16}}); - IToFP.clampScalar(1, S32, S64) - .minScalar(0, S32) - .scalarize(0) - .widenScalarToNextPow2(1); + IToFP.legalFor({{F16, I16}}); + IToFP.clampScalar(1, I32, I64) + .minScalar(0, F32) + .scalarize(0) + .widenScalarToNextPow2(1); auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) - .legalFor({{S32, S32}, {S32, S64}, {S32, S16}}) - .customFor({{S64, S32}, {S64, S64}}) - .narrowScalarFor({{S64, S16}}, changeTo(0, S32)); + .legalFor({{I32, F32}, {I32, F64}, {I32, F16}}) + .customFor({{I64, F32}, {I64, F64}}) + .narrowScalarFor({{I64, F16}}, changeTo(0, I32)); if (ST.has16BitInsts()) - FPToI.legalFor({{S16, S16}}); + FPToI.legalFor({{I16, F16}}); else - FPToI.minScalar(1, S32); + FPToI.minScalar(1, F32); - FPToI.minScalar(0, S32) - .widenScalarToNextPow2(0, 32) - .scalarize(0) - .lower(); + FPToI.minScalar(0, I32).widenScalarToNextPow2(0, 32).scalarize(0).lower(); getActionDefinitionsBuilder({G_LROUND, G_LLROUND}) - .clampScalar(0, S16, S64) + .clampScalar(0, F16, F64) .scalarize(0) .lower(); getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND) - .legalFor({S16, S32}) + .legalFor({F16, F32}) .scalarize(0) .lower(); @@ -1158,28 +1213,28 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .lower(); getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT}) - .clampScalar(0, S16, S64) + .clampScalar(0, F16, F64) .scalarize(0) .lower(); if (ST.has16BitInsts()) { getActionDefinitionsBuilder( {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN}) - .legalFor({S16, S32, S64}) - .clampScalar(0, S16, S64) + .legalFor({F16, F32, F64}) + .clampScalar(0, F16, F64) .scalarize(0); } else if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { getActionDefinitionsBuilder( {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN}) - .legalFor({S32, S64}) - .clampScalar(0, S32, S64) + .legalFor({F32, F64}) + .clampScalar(0, F32, F64) .scalarize(0); } else { getActionDefinitionsBuilder( {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN}) - .legalFor({S32}) - .customFor({S64}) - .clampScalar(0, S32, S64) + .legalFor({F32}) + .customFor({F64}) + .clampScalar(0, F32, F64) .scalarize(0); } @@ -1190,96 +1245,90 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarSameSizeAs(1, 0); getActionDefinitionsBuilder(G_PTRMASK) - .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32}))) - .scalarSameSizeAs(1, 0) - .scalarize(0); + .legalIf(all(sameSize(0, 1), typeInSet(1, {I64, I32}))) + .scalarSameSizeAs(1, 0) + .scalarize(0); auto &CmpBuilder = - getActionDefinitionsBuilder(G_ICMP) - // The compare output type differs based on the register bank of the output, - // so make both s1 and s32 legal. - // - // Scalar compares producing output in scc will be promoted to s32, as that - // is the allocatable register type that will be needed for the copy from - // scc. This will be promoted during RegBankSelect, and we assume something - // before that won't try to use s32 result types. - // - // Vector compares producing an output in vcc/SGPR will use s1 in VCC reg - // bank. - .legalForCartesianProduct( - {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr}) - .legalForCartesianProduct( - {S32}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr}); + getActionDefinitionsBuilder(G_ICMP) + // The compare output type differs based on the register bank of the + // output, so make both s1 and s32 legal. + // + // Scalar compares producing output in scc will be promoted to s32, as + // that is the allocatable register type that will be needed for the + // copy from scc. This will be promoted during RegBankSelect, and we + // assume something before that won't try to use s32 result types. + // + // Vector compares producing an output in vcc/SGPR will use s1 in VCC + // reg bank. + .legalForCartesianProduct({I1}, {I32, I64, GlobalPtr, LocalPtr, + ConstantPtr, PrivatePtr, FlatPtr}) + .legalForCartesianProduct({I32}, {I32, I64, GlobalPtr, LocalPtr, + ConstantPtr, PrivatePtr, FlatPtr}); if (ST.has16BitInsts()) { - CmpBuilder.legalFor({{S1, S16}}); + CmpBuilder.legalFor({{I1, I16}}); } - CmpBuilder - .widenScalarToNextPow2(1) - .clampScalar(1, S32, S64) - .scalarize(0) - .legalIf(all(typeInSet(0, {S1, S32}), isPointer(1))); + CmpBuilder.widenScalarToNextPow2(1) + .clampScalar(1, I32, I64) + .scalarize(0) + .legalIf(all(typeInSet(0, {I1, I32}), isPointer(1))); auto &FCmpBuilder = getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct( - {S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase); + {I1}, ST.has16BitInsts() ? IEEEFPTypes16 : FPTypesBase); if (ST.hasSALUFloatInsts()) - FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32}); + FCmpBuilder.legalForCartesianProduct({I32}, {F16, F32}); - FCmpBuilder - .widenScalarToNextPow2(1) - .clampScalar(1, S32, S64) - .scalarize(0); + FCmpBuilder.widenScalarToNextPow2(1).clampScalar(1, F32, F64).scalarize(0); // FIXME: fpow has a selection pattern that should move to custom lowering. auto &ExpOps = getActionDefinitionsBuilder(G_FPOW); if (ST.has16BitInsts()) - ExpOps.customFor({{S32}, {S16}}); + ExpOps.customFor({{F32}, {F16}}); else - ExpOps.customFor({S32}); - ExpOps.clampScalar(0, MinScalarFPTy, S32) - .scalarize(0); + ExpOps.customFor({F32}); + ExpOps.clampScalar(0, MinScalarFPTy, F32).scalarize(0); getActionDefinitionsBuilder(G_FPOWI) - .clampScalar(0, MinScalarFPTy, S32) - .lower(); + .clampScalar(0, MinScalarFPTy, F32) + .lower(); auto &Log2Ops = getActionDefinitionsBuilder({G_FLOG2, G_FEXP2}); - Log2Ops.customFor({S32}); + Log2Ops.customFor({F32}); if (ST.has16BitInsts()) - Log2Ops.legalFor({S16}); + Log2Ops.legalFor({F16}); else - Log2Ops.customFor({S16}); + Log2Ops.customFor({F16}); Log2Ops.scalarize(0) .lower(); auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP, G_FEXP10}); - LogOps.customFor({S32, S16}); - LogOps.clampScalar(0, MinScalarFPTy, S32) - .scalarize(0); + LogOps.customFor({F32, F16}); + LogOps.clampScalar(0, MinScalarFPTy, F32).scalarize(0); // The 64-bit versions produce 32-bit results, but only on the SALU. getActionDefinitionsBuilder(G_CTPOP) - .legalFor({{S32, S32}, {S32, S64}}) - .clampScalar(0, S32, S32) - .widenScalarToNextPow2(1, 32) - .clampScalar(1, S32, S64) - .scalarize(0) - .widenScalarToNextPow2(0, 32); + .legalFor({{I32, I32}, {I32, I64}}) + .clampScalar(0, I32, I32) + .widenScalarToNextPow2(1, 32) + .clampScalar(1, I32, I64) + .scalarize(0) + .widenScalarToNextPow2(0, 32); // If no 16 bit instr is available, lower into different instructions. if (ST.has16BitInsts()) getActionDefinitionsBuilder(G_IS_FPCLASS) - .legalForCartesianProduct({S1}, FPTypes16) + .legalForCartesianProduct({I1}, FPTypes16) .widenScalarToNextPow2(1) .scalarize(0) .lower(); else getActionDefinitionsBuilder(G_IS_FPCLASS) - .legalForCartesianProduct({S1}, FPTypesBase) - .lowerFor({S1, S16}) + .legalForCartesianProduct({I1}, FPTypesBase) + .lowerFor({I1, I16}) .widenScalarToNextPow2(1) .scalarize(0) .lower(); @@ -1288,27 +1337,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // instructions expect. The hardware produces -1, but these produce the // bitwidth. getActionDefinitionsBuilder({G_CTLZ, G_CTTZ}) - .scalarize(0) - .clampScalar(0, S32, S32) - .clampScalar(1, S32, S64) - .widenScalarToNextPow2(0, 32) - .widenScalarToNextPow2(1, 32) - .custom(); + .scalarize(0) + .clampScalar(0, I32, I32) + .clampScalar(1, I32, I64) + .widenScalarToNextPow2(0, 32) + .widenScalarToNextPow2(1, 32) + .custom(); // The 64-bit versions produce 32-bit results, but only on the SALU. getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) - .legalFor({{S32, S32}, {S32, S64}}) + .legalFor({{I32, I32}, {I32, I64}}) .customIf(scalarNarrowerThan(1, 32)) - .clampScalar(0, S32, S32) - .clampScalar(1, S32, S64) + .clampScalar(0, I32, I32) + .clampScalar(1, I32, I64) .scalarize(0) .widenScalarToNextPow2(0, 32) .widenScalarToNextPow2(1, 32); getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF) - .legalFor({{S32, S32}, {S32, S64}}) - .clampScalar(0, S32, S32) - .clampScalar(1, S32, S64) + .legalFor({{I32, I32}, {I32, I64}}) + .clampScalar(0, I32, I32) + .clampScalar(1, I32, I64) .scalarize(0) .widenScalarToNextPow2(0, 32) .widenScalarToNextPow2(1, 32); @@ -1316,87 +1365,87 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // S64 is only legal on SALU, and needs to be broken into 32-bit elements in // RegBankSelect. getActionDefinitionsBuilder(G_BITREVERSE) - .legalFor({S32, S64}) - .clampScalar(0, S32, S64) - .scalarize(0) - .widenScalarToNextPow2(0); + .legalFor({I32, I64}) + .clampScalar(0, I32, I64) + .scalarize(0) + .widenScalarToNextPow2(0); if (ST.has16BitInsts()) { getActionDefinitionsBuilder(G_BSWAP) - .legalFor({S16, S32, V2S16}) - .clampMaxNumElementsStrict(0, S16, 2) - // FIXME: Fixing non-power-of-2 before clamp is workaround for - // narrowScalar limitation. - .widenScalarToNextPow2(0) - .clampScalar(0, S16, S32) - .scalarize(0); + .legalFor({I16, I32, V2I16}) + .clampMaxNumElementsStrict(0, I16, 2) + // FIXME: Fixing non-power-of-2 before clamp is workaround for + // narrowScalar limitation. + .widenScalarToNextPow2(0) + .clampScalar(0, I16, I32) + .scalarize(0); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS}) - .legalFor({S32, S16, V2S16}) - .clampMaxNumElements(0, S16, 2) - .minScalar(0, S16) - .widenScalarToNextPow2(0) - .scalarize(0) - .lower(); + .legalFor({I32, I16, V2I16}) + .clampMaxNumElements(0, I16, 2) + .minScalar(0, I16) + .widenScalarToNextPow2(0) + .scalarize(0) + .lower(); } else { getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS}) - .legalFor({S32, S16}) - .widenScalarToNextPow2(0) - .minScalar(0, S16) - .scalarize(0) - .lower(); + .legalFor({I32, I16}) + .widenScalarToNextPow2(0) + .minScalar(0, I16) + .scalarize(0) + .lower(); } } else { // TODO: Should have same legality without v_perm_b32 getActionDefinitionsBuilder(G_BSWAP) - .legalFor({S32}) - .lowerIf(scalarNarrowerThan(0, 32)) - // FIXME: Fixing non-power-of-2 before clamp is workaround for - // narrowScalar limitation. - .widenScalarToNextPow2(0) - .maxScalar(0, S32) - .scalarize(0) - .lower(); + .legalFor({I32}) + .lowerIf(scalarNarrowerThan(0, 32)) + // FIXME: Fixing non-power-of-2 before clamp is workaround for + // narrowScalar limitation. + .widenScalarToNextPow2(0) + .maxScalar(0, I32) + .scalarize(0) + .lower(); getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS}) - .legalFor({S32}) - .minScalar(0, S32) - .widenScalarToNextPow2(0) - .scalarize(0) - .lower(); + .legalFor({I32}) + .minScalar(0, I32) + .widenScalarToNextPow2(0) + .scalarize(0) + .lower(); } getActionDefinitionsBuilder(G_INTTOPTR) // List the common cases - .legalForCartesianProduct(AddrSpaces64, {S64}) - .legalForCartesianProduct(AddrSpaces32, {S32}) + .legalForCartesianProduct(AddrSpaces64, {I64}) + .legalForCartesianProduct(AddrSpaces32, {I32}) .scalarize(0) // Accept any address space as long as the size matches .legalIf(sameSize(0, 1)) .widenScalarIf(smallerThan(1, 0), [](const LegalityQuery &Query) { return std::pair( - 1, LLT::scalar(Query.Types[0].getSizeInBits())); + 1, LLT::integer(Query.Types[0].getSizeInBits())); }) .narrowScalarIf(largerThan(1, 0), [](const LegalityQuery &Query) { - return std::pair(1, LLT::scalar(Query.Types[0].getSizeInBits())); + return std::pair(1, LLT::integer(Query.Types[0].getSizeInBits())); }); getActionDefinitionsBuilder(G_PTRTOINT) // List the common cases - .legalForCartesianProduct(AddrSpaces64, {S64}) - .legalForCartesianProduct(AddrSpaces32, {S32}) + .legalForCartesianProduct(AddrSpaces64, {I64}) + .legalForCartesianProduct(AddrSpaces32, {I32}) .scalarize(0) // Accept any address space as long as the size matches .legalIf(sameSize(0, 1)) .widenScalarIf(smallerThan(0, 1), [](const LegalityQuery &Query) { return std::pair( - 0, LLT::scalar(Query.Types[1].getSizeInBits())); + 0, LLT::integer(Query.Types[1].getSizeInBits())); }) .narrowScalarIf(largerThan(0, 1), [](const LegalityQuery &Query) { - return std::pair(0, LLT::scalar(Query.Types[1].getSizeInBits())); + return std::pair(0, LLT::integer(Query.Types[1].getSizeInBits())); }); getActionDefinitionsBuilder(G_ADDRSPACE_CAST) @@ -1449,32 +1498,51 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, auto &Actions = getActionDefinitionsBuilder(Op); // Explicitly list some common cases. // TODO: Does this help compile time at all? - Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, S32, GlobalAlign32}, - {V2S32, GlobalPtr, V2S32, GlobalAlign32}, - {V4S32, GlobalPtr, V4S32, GlobalAlign32}, - {S64, GlobalPtr, S64, GlobalAlign32}, - {V2S64, GlobalPtr, V2S64, GlobalAlign32}, - {V2S16, GlobalPtr, V2S16, GlobalAlign32}, - {S32, GlobalPtr, S8, GlobalAlign8}, - {S32, GlobalPtr, S16, GlobalAlign16}, - - {S32, LocalPtr, S32, 32}, - {S64, LocalPtr, S64, 32}, - {V2S32, LocalPtr, V2S32, 32}, - {S32, LocalPtr, S8, 8}, - {S32, LocalPtr, S16, 16}, - {V2S16, LocalPtr, S32, 32}, - - {S32, PrivatePtr, S32, 32}, - {S32, PrivatePtr, S8, 8}, - {S32, PrivatePtr, S16, 16}, - {V2S16, PrivatePtr, S32, 32}, - - {S32, ConstantPtr, S32, GlobalAlign32}, - {V2S32, ConstantPtr, V2S32, GlobalAlign32}, - {V4S32, ConstantPtr, V4S32, GlobalAlign32}, - {S64, ConstantPtr, S64, GlobalAlign32}, - {V2S32, ConstantPtr, V2S32, GlobalAlign32}}); + Actions.legalForTypesWithMemDesc( + {{I32, GlobalPtr, I32, GlobalAlign32}, + {F32, GlobalPtr, F32, GlobalAlign32}, + + {V2I32, GlobalPtr, V2I32, GlobalAlign32}, + {V2F32, GlobalPtr, V2F32, GlobalAlign32}, + + {V4I32, GlobalPtr, V4I32, GlobalAlign32}, + {V4F32, GlobalPtr, V4F32, GlobalAlign32}, + + {I64, GlobalPtr, I64, GlobalAlign32}, + {F64, GlobalPtr, F64, GlobalAlign32}, + + {V2I64, GlobalPtr, V2I64, GlobalAlign32}, + {V2F64, GlobalPtr, V2F64, GlobalAlign32}, + {V2I16, GlobalPtr, V2I16, GlobalAlign32}, + {V2F16, GlobalPtr, V2F16, GlobalAlign32}, + {V2BF16, GlobalPtr, V2BF16, GlobalAlign32}, + + {I32, GlobalPtr, I8, GlobalAlign8}, + {I32, GlobalPtr, I16, GlobalAlign16}, + + {I32, LocalPtr, I32, 32}, + {F32, LocalPtr, F32, 32}, + {I64, LocalPtr, I64, 32}, + {F64, LocalPtr, F64, 32}, + {V2I32, LocalPtr, V2I32, 32}, + {V2F32, LocalPtr, V2F32, 32}, + {I32, LocalPtr, I8, 8}, + {I32, LocalPtr, I16, 16}, + {V2I16, LocalPtr, I32, 32}, + + {I32, PrivatePtr, I32, 32}, + {F32, PrivatePtr, F32, 32}, + {I32, PrivatePtr, I8, 8}, + {I32, PrivatePtr, I16, 16}, + {V2I16, PrivatePtr, I32, 32}, + + {I32, ConstantPtr, I32, GlobalAlign32}, + {F32, ConstantPtr, F32, GlobalAlign32}, + {V2I32, ConstantPtr, V2I32, GlobalAlign32}, + {V4I32, ConstantPtr, V4I32, GlobalAlign32}, + {I64, ConstantPtr, I64, GlobalAlign32}, + {F64, ConstantPtr, F64, GlobalAlign32}, + {V2I32, ConstantPtr, V2I32, GlobalAlign32}}); Actions.legalIf( [=](const LegalityQuery &Query) -> bool { return isLoadStoreLegal(ST, Query); @@ -1536,16 +1604,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Split extloads. if (DstSize > MemSize) - return std::pair(0, LLT::scalar(MemSize)); + return std::pair(0, LLT::integer(MemSize)); unsigned MaxSize = maxSizeForAddrSpace( ST, PtrTy.getAddressSpace(), Op == G_LOAD, Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic); if (MemSize > MaxSize) - return std::pair(0, LLT::scalar(MaxSize)); + return std::pair(0, LLT::integer(MaxSize)); uint64_t Align = Query.MMODescrs[0].AlignInBits; - return std::pair(0, LLT::scalar(Align)); + return std::pair(0, LLT::integer(Align)); }) .fewerElementsIf( [=](const LegalityQuery &Query) -> bool { @@ -1608,31 +1676,31 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // May need relegalization for the scalars. return std::pair(0, EltTy); }) - .minScalar(0, S32) - .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32)) - .widenScalarToNextPow2(0) - .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)) - .lower(); + .minScalar(0, I32) + .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, I32)) + .widenScalarToNextPow2(0) + .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)) + .lower(); } // FIXME: Unaligned accesses not lowered. - auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8}, - {S32, GlobalPtr, S16, 2 * 8}, - {S32, LocalPtr, S8, 8}, - {S32, LocalPtr, S16, 16}, - {S32, PrivatePtr, S8, 8}, - {S32, PrivatePtr, S16, 16}, - {S32, ConstantPtr, S8, 8}, - {S32, ConstantPtr, S16, 2 * 8}}) - .legalIf( - [=](const LegalityQuery &Query) -> bool { - return isLoadStoreLegal(ST, Query); - }); + auto &ExtLoads = + getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) + .legalForTypesWithMemDesc({{I32, GlobalPtr, I8, 8}, + {I32, GlobalPtr, I16, 2 * 8}, + {I32, LocalPtr, I8, 8}, + {I32, LocalPtr, I16, 16}, + {I32, PrivatePtr, I8, 8}, + {I32, PrivatePtr, I16, 16}, + {I32, ConstantPtr, I8, 8}, + {I32, ConstantPtr, I16, 2 * 8}}) + .legalIf([=](const LegalityQuery &Query) -> bool { + return isLoadStoreLegal(ST, Query); + }); if (ST.hasFlatAddressSpace()) { ExtLoads.legalForTypesWithMemDesc( - {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}}); + {{I32, FlatPtr, I8, 8}, {I32, FlatPtr, I16, 16}}); } // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to @@ -1642,45 +1710,43 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // inserting addrspacecasts. ExtLoads.customIf(typeIs(1, Constant32Ptr)); - ExtLoads.clampScalar(0, S32, S32) - .widenScalarToNextPow2(0) - .lower(); - - auto &Atomics = getActionDefinitionsBuilder( - {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, - G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, - G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, - G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP}) - .legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, - {S64, GlobalPtr}, {S64, LocalPtr}, - {S32, RegionPtr}, {S64, RegionPtr}}); + ExtLoads.clampScalar(0, I32, I32).widenScalarToNextPow2(0).lower(); + + auto &Atomics = + getActionDefinitionsBuilder( + {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, + G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, + G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP, + G_ATOMICRMW_UDEC_WRAP}) + .legalFor({{I32, GlobalPtr}, + {I32, LocalPtr}, + {I64, GlobalPtr}, + {I64, LocalPtr}, + {I32, RegionPtr}, + {I64, RegionPtr}}); if (ST.hasFlatAddressSpace()) { - Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); + Atomics.legalFor({{I32, FlatPtr}, {I64, FlatPtr}}); } // TODO: v2bf16 operations, and fat buffer pointer support. auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD); if (ST.hasLDSFPAtomicAddF32()) { - Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}}); + Atomic.legalFor({{F32, LocalPtr}, {F32, RegionPtr}}); if (ST.hasLdsAtomicAddF64()) - Atomic.legalFor({{S64, LocalPtr}}); + Atomic.legalFor({{F64, LocalPtr}}); if (ST.hasAtomicDsPkAdd16Insts()) Atomic.legalFor({{V2F16, LocalPtr}, {V2BF16, LocalPtr}}); } if (ST.hasAtomicFaddInsts()) - Atomic.legalFor({{S32, GlobalPtr}}); + Atomic.legalFor({{F32, GlobalPtr}}); if (ST.hasFlatAtomicFaddF32Inst()) - Atomic.legalFor({{S32, FlatPtr}}); + Atomic.legalFor({{F32, FlatPtr}}); if (ST.hasGFX90AInsts()) { // These are legal with some caveats, and should have undergone expansion in // the IR in most situations // TODO: Move atomic expansion into legalizer - Atomic.legalFor({ - {S32, GlobalPtr}, - {S64, GlobalPtr}, - {S64, FlatPtr} - }); + Atomic.legalFor({{F32, GlobalPtr}, {F64, GlobalPtr}, {F64, FlatPtr}}); } if (ST.hasAtomicBufferGlobalPkAddF16NoRtnInsts() || @@ -1710,72 +1776,94 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output // demarshalling getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) - .customFor({{S32, GlobalPtr}, {S64, GlobalPtr}, - {S32, FlatPtr}, {S64, FlatPtr}}) - .legalFor({{S32, LocalPtr}, {S64, LocalPtr}, - {S32, RegionPtr}, {S64, RegionPtr}}); + .customFor( + {{I32, GlobalPtr}, {I64, GlobalPtr}, {I32, FlatPtr}, {I64, FlatPtr}}) + .legalFor({{I32, LocalPtr}, + {F32, LocalPtr}, + {I64, LocalPtr}, + {F64, LocalPtr}, + {I32, RegionPtr}, + {I64, RegionPtr}}); // TODO: Pointer types, any 32-bit or 64-bit vector // Condition should be s32 for scalar, s1 for vector. getActionDefinitionsBuilder(G_SELECT) - .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, GlobalPtr, - LocalPtr, FlatPtr, PrivatePtr, + .legalForCartesianProduct({I16, + F16, + BF16, + I32, + F32, + I64, + F64, + V2I32, + V2F32, + V2I16, + V2F16, + V2BF16, + V4I16, + V4F16, + V4BF16, + GlobalPtr, + LocalPtr, + FlatPtr, + PrivatePtr, LLT::fixed_vector(2, LocalPtr), LLT::fixed_vector(2, PrivatePtr)}, - {S1, S32}) - .clampScalar(0, S16, S64) + {I1, I32}) + .clampScalar(0, I16, I64) .scalarize(1) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .fewerElementsIf(numElementsNotEven(0), scalarize(0)) - .clampMaxNumElements(0, S32, 2) + .clampMaxNumElements(0, I32, 2) + .clampMaxNumElements(0, F32, 2) .clampMaxNumElements(0, LocalPtr, 2) .clampMaxNumElements(0, PrivatePtr, 2) .scalarize(0) .widenScalarToNextPow2(0) - .legalIf(all(isPointer(0), typeInSet(1, {S1, S32}))); + .legalIf(all(isPointer(0), typeInSet(1, {I1, I32}))); // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can // be more flexible with the shift amount type. auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR}) - .legalFor({{S32, S32}, {S64, S32}}); + .legalFor({{I32, I32}, {I64, I32}}); if (ST.has16BitInsts()) { if (ST.hasVOP3PInsts()) { - Shifts.legalFor({{S16, S16}, {V2S16, V2S16}}) - .clampMaxNumElements(0, S16, 2); + Shifts.legalFor({{I16, I16}, {V2I16, V2I16}}) + .clampMaxNumElements(0, I16, 2); } else - Shifts.legalFor({{S16, S16}}); + Shifts.legalFor({{I16, I16}}); // TODO: Support 16-bit shift amounts for all types Shifts.widenScalarIf( - [=](const LegalityQuery &Query) { - // Use 16-bit shift amounts for any 16-bit shift. Otherwise we want a - // 32-bit amount. - const LLT ValTy = Query.Types[0]; - const LLT AmountTy = Query.Types[1]; - return ValTy.getSizeInBits() <= 16 && - AmountTy.getSizeInBits() < 16; - }, changeTo(1, S16)); - Shifts.maxScalarIf(typeIs(0, S16), 1, S16); - Shifts.clampScalar(1, S32, S32); + [=](const LegalityQuery &Query) { + // Use 16-bit shift amounts for any 16-bit shift. Otherwise we want a + // 32-bit amount. + const LLT ValTy = Query.Types[0]; + const LLT AmountTy = Query.Types[1]; + return ValTy.getSizeInBits() <= 16 && AmountTy.getSizeInBits() < 16; + }, + changeTo(1, I16)); + Shifts.maxScalarIf(typeIs(0, I16), 1, I16); + Shifts.clampScalar(1, I32, I32); Shifts.widenScalarToNextPow2(0, 16); - Shifts.clampScalar(0, S16, S64); + Shifts.clampScalar(0, I16, I64); getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT}) - .minScalar(0, S16) - .scalarize(0) - .lower(); + .minScalar(0, I16) + .scalarize(0) + .lower(); } else { // Make sure we legalize the shift amount type first, as the general // expansion for the shifted type will produce much worse code if it hasn't // been truncated already. - Shifts.clampScalar(1, S32, S32); + Shifts.clampScalar(1, I32, I32); Shifts.widenScalarToNextPow2(0, 32); - Shifts.clampScalar(0, S32, S64); + Shifts.clampScalar(0, I32, I64); getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT}) - .minScalar(0, S32) - .scalarize(0) - .lower(); + .minScalar(0, I32) + .scalarize(0) + .lower(); } Shifts.scalarize(0); @@ -1808,29 +1896,29 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, }) .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)), - bitcastToVectorElement32(VecTypeIdx)) + bitcastToVectorElementI32(VecTypeIdx)) //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1)) .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), - scalarOrEltWiderThan(VecTypeIdx, 64)), - [=](const LegalityQuery &Query) { - // For > 64-bit element types, try to turn this into a - // 64-bit element vector since we may be able to do better - // indexing if this is scalar. If not, fall back to 32. - const LLT EltTy = Query.Types[EltTypeIdx]; - const LLT VecTy = Query.Types[VecTypeIdx]; - const unsigned DstEltSize = EltTy.getSizeInBits(); - const unsigned VecSize = VecTy.getSizeInBits(); - - const unsigned TargetEltSize = - DstEltSize % 64 == 0 ? 64 : 32; - return std::pair(VecTypeIdx, - LLT::fixed_vector(VecSize / TargetEltSize, - TargetEltSize)); - }) - .clampScalar(EltTypeIdx, S32, S64) - .clampScalar(VecTypeIdx, S32, S64) - .clampScalar(IdxTypeIdx, S32, S32) - .clampMaxNumElements(VecTypeIdx, S32, 32) + scalarOrEltWiderThan(VecTypeIdx, 64)), + [=](const LegalityQuery &Query) { + // For > 64-bit element types, try to turn this into a + // 64-bit element vector since we may be able to do better + // indexing if this is scalar. If not, fall back to 32. + const LLT EltTy = Query.Types[EltTypeIdx]; + const LLT VecTy = Query.Types[VecTypeIdx]; + const unsigned DstEltSize = EltTy.getSizeInBits(); + const unsigned VecSize = VecTy.getSizeInBits(); + + const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32; + return std::pair(VecTypeIdx, + LLT::fixed_vector(VecSize / TargetEltSize, + LLT::integer(TargetEltSize))); + }) + .clampScalar(EltTypeIdx, I32, I64) + .clampScalar(VecTypeIdx, I32, I64) + .clampScalar(IdxTypeIdx, I32, I32) + .clampMaxNumElements(VecTypeIdx, I32, 32) + .clampMaxNumElements(VecTypeIdx, F32, 32) // TODO: Clamp elements for 64-bit vectors? .moreElementsIf(isIllegalRegisterType(ST, VecTypeIdx), moreElementsToNextExistingRegClass(VecTypeIdx)) @@ -1851,74 +1939,83 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) - .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) - .lowerIf([=](const LegalityQuery &Query) { + .lowerIf(all(typeIs(LitTyIdx, I16), sizeIs(BigTyIdx, 32))) + .lowerIf([=](const LegalityQuery &Query) { // Sub-vector(or single element) insert and extract. // TODO: verify immediate offset here since lower only works with // whole elements. const LLT BigTy = Query.Types[BigTyIdx]; return BigTy.isVector(); }) - // FIXME: Multiples of 16 should not be legal. - .legalIf([=](const LegalityQuery &Query) { + // FIXME: Multiples of 16 should not be legal. + .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; return (BigTy.getSizeInBits() % 32 == 0) && (LitTy.getSizeInBits() % 16 == 0); }) - .widenScalarIf( - [=](const LegalityQuery &Query) { - const LLT BigTy = Query.Types[BigTyIdx]; - return (BigTy.getScalarSizeInBits() < 16); - }, - LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16)) - .widenScalarIf( - [=](const LegalityQuery &Query) { - const LLT LitTy = Query.Types[LitTyIdx]; - return (LitTy.getScalarSizeInBits() < 16); - }, - LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16)) - .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) - .widenScalarToNextPow2(BigTyIdx, 32); - + .widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT BigTy = Query.Types[BigTyIdx]; + return (BigTy.getScalarSizeInBits() < 16); + }, + LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16)) + .widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT LitTy = Query.Types[LitTyIdx]; + return (LitTy.getScalarSizeInBits() < 16); + }, + LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16)) + .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) + .widenScalarToNextPow2(BigTyIdx, 32); } auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalForCartesianProduct(AllS32Vectors, {S32}) - .legalForCartesianProduct(AllS64Vectors, {S64}) - .clampNumElements(0, V16S32, V32S32) - .clampNumElements(0, V2S64, V16S64) - .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)) + .legalForCartesianProduct(AllI32Vectors, {I32}) + .legalForCartesianProduct(AllF32Vectors, {F32}) + .legalForCartesianProduct(AllI64Vectors, {I64}) + .legalForCartesianProduct(AllF64Vectors, {F64}) + .clampNumElements(0, V16I32, V32I32) + .clampNumElements(0, V2I64, V16I64) + .fewerElementsIf(isWideVec16(0), + changeElementCountTo(0, ElementCount::getFixed(2))) .moreElementsIf(isIllegalRegisterType(ST, 0), moreElementsToNextExistingRegClass(0)); if (ST.hasScalarPackInsts()) { BuildVector - // FIXME: Should probably widen s1 vectors straight to s32 - .minScalarOrElt(0, S16) - .minScalar(1, S16); + // FIXME: Should probably widen s1 vectors straight to s32 + .minScalarOrElt(0, I16) + .minScalar(1, I16); getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) - .legalFor({V2S16, S32}) - .lower(); + .legalFor({V2I16, I32}) + .lower(); } else { - BuildVector.customFor({V2S16, S16}); - BuildVector.minScalarOrElt(0, S32); + BuildVector.customFor({V2I16, I16}); + BuildVector.customFor({V2F16, F16}); + BuildVector.customFor({V2BF16, BF16}); + BuildVector.minScalarOrElt(0, I32); getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) - .customFor({V2S16, S32}) - .lower(); + .customFor({V2I16, I32}) + .lower(); } BuildVector.legalIf(isRegisterType(ST, 0)); // FIXME: Clamp maximum size getActionDefinitionsBuilder(G_CONCAT_VECTORS) - .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1))) - .clampMaxNumElements(0, S32, 32) - .clampMaxNumElements(1, S16, 2) // TODO: Make 4? - .clampMaxNumElements(0, S16, 64); + .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1))) + .clampMaxNumElements(0, I32, 32) + .clampMaxNumElements(0, F32, 32) + .clampMaxNumElements(1, I16, 2) // TODO: Make 4? + .clampMaxNumElements(1, F16, 2) // TODO: Make 4? + .clampMaxNumElements(1, BF16, 2) // TODO: Make 4? + .clampMaxNumElements(0, I16, 64) + .clampMaxNumElements(0, F16, 64) + .clampMaxNumElements(0, BF16, 64); getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower(); @@ -1942,24 +2039,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, auto &Builder = getActionDefinitionsBuilder(Op) .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1))) - .lowerFor({{S16, V2S16}}) + .lowerFor({{I16, V2I16}}) + .lowerFor({{F16, V2F16}}) + .lowerFor({{BF16, V2BF16}}) .lowerIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; return BigTy.getSizeInBits() == 32; }) // Try to widen to s16 first for small types. // TODO: Only do this on targets with legal s16 shifts - .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16) + .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, I16) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) - .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32), - elementTypeIs(1, S16)), - changeTo(1, V2S16)) + .fewerElementsIf(all(typeIs(0, I16), vectorWiderThan(1, 32), + elementTypeIs(1, I16)), + changeTo(1, V2I16)) + .fewerElementsIf(all(typeIs(0, F16), vectorWiderThan(1, 32), + elementTypeIs(1, F16)), + changeTo(1, V2F16)) + .fewerElementsIf(all(typeIs(0, BF16), vectorWiderThan(1, 32), + elementTypeIs(1, BF16)), + changeTo(1, V2BF16)) // Clamp the little scalar to s8-s256 and make it a power of 2. It's // not worth considering the multiples of 64 since 2*192 and 2*384 // are not valid. - .clampScalar(LitTyIdx, S32, S512) + .clampScalar(LitTyIdx, I32, I512) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) // Break up vectors with weird elements into scalars .fewerElementsIf( @@ -1972,16 +2077,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return notValidElt(Query, BigTyIdx); }, scalarize(1)) - .clampScalar(BigTyIdx, S32, MaxScalar); + .clampScalar(BigTyIdx, I32, MaxScalar); if (Op == G_MERGE_VALUES) { Builder.widenScalarIf( - // TODO: Use 16-bit shifts if legal for 8-bit values? - [=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[LitTyIdx]; - return Ty.getSizeInBits() < 32; - }, - changeTo(LitTyIdx, S32)); + // TODO: Use 16-bit shifts if legal for 8-bit values? + [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[LitTyIdx]; + return Ty.getSizeInBits() < 32; + }, + changeTo(LitTyIdx, I32)); } Builder.widenScalarIf( @@ -2008,27 +2113,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // S64 is only legal on SALU, and needs to be broken into 32-bit elements in // RegBankSelect. - auto &SextInReg = getActionDefinitionsBuilder(G_SEXT_INREG) - .legalFor({{S32}, {S64}}); + auto &SextInReg = + getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({{I32}, {I64}}); if (ST.hasVOP3PInsts()) { - SextInReg.lowerFor({{V2S16}}) - // Prefer to reduce vector widths for 16-bit vectors before lowering, to - // get more vector shift opportunities, since we'll get those when - // expanded. - .clampMaxNumElementsStrict(0, S16, 2); + SextInReg + .lowerFor({{V2I16}}) + // Prefer to reduce vector widths for 16-bit vectors before lowering, to + // get more vector shift opportunities, since we'll get those when + // expanded. + .clampMaxNumElementsStrict(0, I16, 2); } else if (ST.has16BitInsts()) { - SextInReg.lowerFor({{S32}, {S64}, {S16}}); + SextInReg.lowerFor({{I32}, {I64}, {I16}}); } else { // Prefer to promote to s32 before lowering if we don't have 16-bit // shifts. This avoid a lot of intermediate truncate and extend operations. - SextInReg.lowerFor({{S32}, {S64}}); + SextInReg.lowerFor({{I32}, {I64}}); } - SextInReg - .scalarize(0) - .clampScalar(0, S32, S64) - .lower(); + SextInReg.scalarize(0).clampScalar(0, I32, I64).lower(); getActionDefinitionsBuilder({G_ROTR, G_ROTL}) .scalarize(0) @@ -2036,41 +2139,40 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // TODO: Only Try to form v2s16 with legal packed instructions. getActionDefinitionsBuilder(G_FSHR) - .legalFor({{S32, S32}}) - .lowerFor({{V2S16, V2S16}}) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .lower(); + .legalFor({{I32, I32}}) + .lowerFor({{V2I16, V2I16}}) + .clampMaxNumElementsStrict(0, I16, 2) + .scalarize(0) + .lower(); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_FSHL) - .lowerFor({{V2S16, V2S16}}) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .lower(); + .lowerFor({{V2I16, V2I16}}) + .clampMaxNumElementsStrict(0, I16, 2) + .scalarize(0) + .lower(); } else { getActionDefinitionsBuilder(G_FSHL) .scalarize(0) .lower(); } - getActionDefinitionsBuilder(G_READCYCLECOUNTER) - .legalFor({S64}); + getActionDefinitionsBuilder(G_READCYCLECOUNTER).legalFor({I64}); - getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({S64}); + getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({I64}); getActionDefinitionsBuilder(G_FENCE) .alwaysLegal(); getActionDefinitionsBuilder({G_SMULO, G_UMULO}) .scalarize(0) - .minScalar(0, S32) + .minScalar(0, I32) .lower(); getActionDefinitionsBuilder({G_SBFX, G_UBFX}) - .legalFor({{S32, S32}, {S64, S32}}) - .clampScalar(1, S32, S32) - .clampScalar(0, S32, S64) + .legalFor({{I32, I32}, {I64, I32}}) + .clampScalar(1, I32, I32) + .clampScalar(0, I32, I64) .widenScalarToNextPow2(0) .scalarize(0); @@ -2086,8 +2188,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasIEEEMinMax()) { getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}) - .legalFor(FPTypesPK16) - .clampMaxNumElements(0, S16, 2) + .legalFor(IEEEFPTypesPK16) + .clampMaxNumElements(0, F16, 2) .scalarize(0); } else { // TODO: Implement @@ -2153,6 +2255,10 @@ bool AMDGPULegalizerInfo::legalizeCustom( case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: return legalizeLoad(Helper, MI); + case TargetOpcode::G_FPEXT: + return legalizeFPExt(MI, MRI, B); + case TargetOpcode::G_FPTRUNC: + return legalizeFPTrunc(MI, MRI, B); case TargetOpcode::G_STORE: return legalizeStore(Helper, MI); case TargetOpcode::G_FMAD: @@ -2220,8 +2326,6 @@ Register AMDGPULegalizerInfo::getSegmentAperture( MachineIRBuilder &B) const { MachineFunction &MF = B.getMF(); const GCNSubtarget &ST = MF.getSubtarget(); - const LLT S32 = LLT::scalar(32); - const LLT S64 = LLT::scalar(64); assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS); @@ -2239,10 +2343,10 @@ Register AMDGPULegalizerInfo::getSegmentAperture( // Register TableGen definitions would need an overhaul to get rid of the // artificial "HI" aperture registers and prevent this kind of issue from // happening. - Register Dst = MRI.createGenericVirtualRegister(S64); + Register Dst = MRI.createGenericVirtualRegister(I64); MRI.setRegClass(Dst, &AMDGPU::SReg_64RegClass); B.buildInstr(AMDGPU::S_MOV_B64, {Dst}, {Register(ApertureRegNo)}); - return B.buildUnmerge(S32, Dst).getReg(1); + return B.buildUnmerge(I32, Dst).getReg(1); } // TODO: can we be smarter about machine pointer info? @@ -2270,13 +2374,13 @@ Register AMDGPULegalizerInfo::getSegmentAperture( PtrInfo, MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, - LLT::scalar(32), commonAlignment(Align(64), Offset)); + I32, commonAlignment(Align(64), Offset)); // Pointer address B.buildPtrAdd(LoadAddr, KernargPtrReg, - B.buildConstant(LLT::scalar(64), Offset).getReg(0)); + B.buildConstant(I64, Offset).getReg(0)); // Load address - return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); + return B.buildLoad(I32, LoadAddr, *MMO).getReg(0); } Register QueuePtr = MRI.createGenericVirtualRegister( @@ -2293,11 +2397,11 @@ Register AMDGPULegalizerInfo::getSegmentAperture( PtrInfo, MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, - LLT::scalar(32), commonAlignment(Align(64), StructOffset)); + I32, commonAlignment(Align(64), StructOffset)); B.buildPtrAdd(LoadAddr, QueuePtr, - B.buildConstant(LLT::scalar(64), StructOffset).getReg(0)); - return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); + B.buildConstant(I64, StructOffset).getReg(0)); + return B.buildLoad(I32, LoadAddr, *MMO).getReg(0); } /// Return true if the value is a known valid address, such that a null check is @@ -2331,8 +2435,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( assert(MI.getOpcode() == TargetOpcode::G_ADDRSPACE_CAST || (isa(MI) && cast(MI).getIntrinsicID() == Intrinsic::amdgcn_addrspacecast_nonnull)); - - const LLT S32 = LLT::scalar(32); Register Dst = MI.getOperand(0).getReg(); Register Src = isa(MI) ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg(); @@ -2373,8 +2475,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( // Extract low 32-bits of the pointer. auto PtrLo32 = B.buildExtract(DstTy, Src, 0); - auto CmpRes = - B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, FlatNull.getReg(0)); + auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, I1, Src, FlatNull.getReg(0)); B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); MI.eraseFromParent(); @@ -2391,7 +2492,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( // Coerce the type of the low half of the result so we can use // merge_values. - Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); + Register SrcAsInt = B.buildPtrToInt(I32, Src).getReg(0); // TODO: Should we allow mismatched types but matching sizes in merges to // avoid the ptrtoint? @@ -2411,8 +2512,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); - auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, - SegmentNull.getReg(0)); + auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, I1, Src, SegmentNull.getReg(0)); B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull); @@ -2432,8 +2532,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( DstTy.getSizeInBits() == 64) { const SIMachineFunctionInfo *Info = MF.getInfo(); uint32_t AddrHiVal = Info->get32BitAddressHighBits(); - auto PtrLo = B.buildPtrToInt(S32, Src); - auto HighAddr = B.buildConstant(S32, AddrHiVal); + auto PtrLo = B.buildPtrToInt(I32, Src); + auto HighAddr = B.buildConstant(I32, AddrHiVal); B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr}); MI.eraseFromParent(); return true; @@ -2472,13 +2572,9 @@ bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI, return true; } -bool AMDGPULegalizerInfo::legalizeFceil( - MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - - const LLT S1 = LLT::scalar(1); - const LLT S64 = LLT::scalar(64); - +bool AMDGPULegalizerInfo::legalizeFceil(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { Register Src = MI.getOperand(1).getReg(); assert(MRI.getType(Src).isFloat(64)); @@ -2486,14 +2582,14 @@ bool AMDGPULegalizerInfo::legalizeFceil( // if (src > 0.0 && src != result) // result += 1.0 - auto Trunc = B.buildIntrinsicTrunc(S64, Src); + auto Trunc = B.buildIntrinsicTrunc(F64, Src); - const auto Zero = B.buildFConstant(S64, 0.0); - const auto One = B.buildFConstant(S64, 1.0); - auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero); - auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc); - auto And = B.buildAnd(S1, Lt0, NeTrunc); - auto Add = B.buildSelect(S64, And, One, Zero); + const auto Zero = B.buildFConstant(F64, 0.0); + const auto One = B.buildFConstant(F64, 1.0); + auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, I1, Src, Zero); + auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, I1, Src, Trunc); + auto And = B.buildAnd(I1, Lt0, NeTrunc); + auto Add = B.buildSelect(F64, And, One, Zero); // TODO: Should this propagate fast-math-flags? B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add); @@ -2522,31 +2618,27 @@ static MachineInstrBuilder extractF64Exponent(Register Hi, MachineIRBuilder &B) { const unsigned FractBits = 52; const unsigned ExpBits = 11; - LLT S32 = LLT::scalar(32); - auto Const0 = B.buildConstant(S32, FractBits - 32); - auto Const1 = B.buildConstant(S32, ExpBits); + auto Const0 = B.buildConstant(I32, FractBits - 32); + auto Const1 = B.buildConstant(I32, ExpBits); - auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}) + auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {I32}) .addUse(Hi) .addUse(Const0.getReg(0)) .addUse(Const1.getReg(0)); - return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023)); + return B.buildSub(I32, ExpPart, B.buildConstant(I32, 1023)); } -bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( - MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - const LLT S1 = LLT::scalar(1); - const LLT S32 = LLT::scalar(32); - const LLT S64 = LLT::scalar(64); - +bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { Register Src = MI.getOperand(1).getReg(); - assert(MRI.getType(Src) == S64); + assert(MRI.getType(Src).isFloat(64)); // TODO: Should this use extract since the low half is unused? - auto Unmerge = B.buildUnmerge({S32, S32}, Src); + auto I64Src = B.buildBitcast(I64, Src); + auto Unmerge = B.buildUnmerge({I32, I32}, I64Src); Register Hi = Unmerge.getReg(1); // Extract the upper half, since this is where we will find the sign and @@ -2556,25 +2648,27 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( const unsigned FractBits = 52; // Extract the sign bit. - const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31); - auto SignBit = B.buildAnd(S32, Hi, SignBitMask); + const auto SignBitMask = B.buildConstant(I32, UINT32_C(1) << 31); + auto SignBit = B.buildAnd(I32, Hi, SignBitMask); - const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1); + const auto FractMask = B.buildConstant(I64, (UINT64_C(1) << FractBits) - 1); - const auto Zero32 = B.buildConstant(S32, 0); + const auto Zero32 = B.buildConstant(I32, 0); // Extend back to 64-bits. - auto SignBit64 = B.buildMergeLikeInstr(S64, {Zero32, SignBit}); + auto SignBit64 = B.buildMergeLikeInstr(I64, {Zero32, SignBit}); + SignBit64 = B.buildBitcast(F64, SignBit64); - auto Shr = B.buildAShr(S64, FractMask, Exp); - auto Not = B.buildNot(S64, Shr); - auto Tmp0 = B.buildAnd(S64, Src, Not); - auto FiftyOne = B.buildConstant(S32, FractBits - 1); + auto Shr = B.buildAShr(I64, FractMask, Exp); + auto Not = B.buildNot(I64, Shr); + auto And = B.buildAnd(I64, I64Src, Not); + auto Tmp0 = B.buildBitcast(F64, And); + auto FiftyOne = B.buildConstant(I32, FractBits - 1); - auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32); - auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne); + auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, I1, Exp, Zero32); + auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, I1, Exp, FiftyOne); - auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0); + auto Tmp1 = B.buildSelect(F64, ExpLt0, SignBit64, Tmp0); B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1); MI.eraseFromParent(); return true; @@ -2587,20 +2681,17 @@ bool AMDGPULegalizerInfo::legalizeITOFP( Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - - assert(MRI.getType(Src) == S64); + assert(MRI.getType(Src).isInteger(64)); - auto Unmerge = B.buildUnmerge({S32, S32}, Src); - auto ThirtyTwo = B.buildConstant(S32, 32); + auto Unmerge = B.buildUnmerge({I32, I32}, Src); + auto ThirtyTwo = B.buildConstant(I32, 32); - if (MRI.getType(Dst) == S64) { - auto CvtHi = Signed ? B.buildSITOFP(S64, Unmerge.getReg(1)) - : B.buildUITOFP(S64, Unmerge.getReg(1)); + if (MRI.getType(Dst).isFloat(64)) { + auto CvtHi = Signed ? B.buildSITOFP(F64, Unmerge.getReg(1)) + : B.buildUITOFP(F64, Unmerge.getReg(1)); - auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0)); - auto LdExp = B.buildFLdexp(S64, CvtHi, ThirtyTwo); + auto CvtLo = B.buildUITOFP(F64, Unmerge.getReg(0)); + auto LdExp = B.buildFLdexp(F64, CvtHi, ThirtyTwo); // TODO: Should this propagate fast-math-flags? B.buildFAdd(Dst, LdExp, CvtLo); @@ -2608,28 +2699,28 @@ bool AMDGPULegalizerInfo::legalizeITOFP( return true; } - assert(MRI.getType(Dst) == S32); + assert(MRI.getType(Dst).isFloat(32)); - auto One = B.buildConstant(S32, 1); + auto One = B.buildConstant(I32, 1); MachineInstrBuilder ShAmt; if (Signed) { - auto ThirtyOne = B.buildConstant(S32, 31); - auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1)); - auto OppositeSign = B.buildAShr(S32, X, ThirtyOne); - auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign); - auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32}) + auto ThirtyOne = B.buildConstant(I32, 31); + auto X = B.buildXor(I32, Unmerge.getReg(0), Unmerge.getReg(1)); + auto OppositeSign = B.buildAShr(I32, X, ThirtyOne); + auto MaxShAmt = B.buildAdd(I32, ThirtyTwo, OppositeSign); + auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {I32}) .addUse(Unmerge.getReg(1)); - auto LS2 = B.buildSub(S32, LS, One); - ShAmt = B.buildUMin(S32, LS2, MaxShAmt); + auto LS2 = B.buildSub(I32, LS, One); + ShAmt = B.buildUMin(I32, LS2, MaxShAmt); } else - ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1)); - auto Norm = B.buildShl(S64, Src, ShAmt); - auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm); - auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0)); - auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust); - auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2); - auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt); + ShAmt = B.buildCTLZ(I32, Unmerge.getReg(1)); + auto Norm = B.buildShl(I64, Src, ShAmt); + auto Unmerge2 = B.buildUnmerge({I32, I32}, Norm); + auto Adjust = B.buildUMin(I32, One, Unmerge2.getReg(0)); + auto Norm2 = B.buildOr(I32, Unmerge2.getReg(1), Adjust); + auto FVal = Signed ? B.buildSITOFP(F32, Norm2) : B.buildUITOFP(F32, Norm2); + auto Scale = B.buildSub(I32, ThirtyTwo, ShAmt); B.buildFLdexp(Dst, FVal, Scale); MI.eraseFromParent(); return true; @@ -2645,11 +2736,9 @@ bool AMDGPULegalizerInfo::legalizeFPTOI(MachineInstr &MI, Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - const LLT SrcLT = MRI.getType(Src); - assert((SrcLT == S32 || SrcLT == S64) && MRI.getType(Dst) == S64); + assert((SrcLT.isFloat(32) || SrcLT.isFloat(64)) && + MRI.getType(Dst).isInteger(64)); unsigned Flags = MI.getFlags(); @@ -2664,41 +2753,41 @@ bool AMDGPULegalizerInfo::legalizeFPTOI(MachineInstr &MI, // auto Trunc = B.buildIntrinsicTrunc(SrcLT, Src, Flags); MachineInstrBuilder Sign; - if (Signed && SrcLT == S32) { + if (Signed && SrcLT.isFloat(32)) { // However, a 32-bit floating point number has only 23 bits mantissa and // it's not enough to hold all the significant bits of `lof` if val is // negative. To avoid the loss of precision, We need to take the absolute // value after truncating and flip the result back based on the original // signedness. - Sign = B.buildAShr(S32, Src, B.buildConstant(S32, 31)); - Trunc = B.buildFAbs(S32, Trunc, Flags); + Sign = B.buildAShr(I32, B.buildBitcast(I32, Src), B.buildConstant(I32, 31)); + Trunc = B.buildFAbs(F32, Trunc, Flags); } MachineInstrBuilder K0, K1; - if (SrcLT == S64) { + if (SrcLT.isFloat(64)) { K0 = B.buildFConstant( - S64, llvm::bit_cast(UINT64_C(/*2^-32*/ 0x3df0000000000000))); + F64, llvm::bit_cast(UINT64_C(/*2^-32*/ 0x3df0000000000000))); K1 = B.buildFConstant( - S64, llvm::bit_cast(UINT64_C(/*-2^32*/ 0xc1f0000000000000))); + F64, llvm::bit_cast(UINT64_C(/*-2^32*/ 0xc1f0000000000000))); } else { K0 = B.buildFConstant( - S32, llvm::bit_cast(UINT32_C(/*2^-32*/ 0x2f800000))); + F32, llvm::bit_cast(UINT32_C(/*2^-32*/ 0x2f800000))); K1 = B.buildFConstant( - S32, llvm::bit_cast(UINT32_C(/*-2^32*/ 0xcf800000))); + F32, llvm::bit_cast(UINT32_C(/*-2^32*/ 0xcf800000))); } auto Mul = B.buildFMul(SrcLT, Trunc, K0, Flags); auto FloorMul = B.buildFFloor(SrcLT, Mul, Flags); auto Fma = B.buildFMA(SrcLT, FloorMul, K1, Trunc, Flags); - auto Hi = (Signed && SrcLT == S64) ? B.buildFPTOSI(S32, FloorMul) - : B.buildFPTOUI(S32, FloorMul); - auto Lo = B.buildFPTOUI(S32, Fma); + auto Hi = (Signed && SrcLT.isFloat(64)) ? B.buildFPTOSI(I32, FloorMul) + : B.buildFPTOUI(I32, FloorMul); + auto Lo = B.buildFPTOUI(I32, Fma); - if (Signed && SrcLT == S32) { + if (Signed && SrcLT.isFloat(32)) { // Flip the result based on the signedness, which is either all 0s or 1s. - Sign = B.buildMergeLikeInstr(S64, {Sign, Sign}); + Sign = B.buildMergeLikeInstr(I64, {Sign, Sign}); // r := xor({lo, hi}, sign) - sign; - B.buildSub(Dst, B.buildXor(S64, B.buildMergeLikeInstr(S64, {Lo, Hi}), Sign), + B.buildSub(Dst, B.buildXor(I64, B.buildMergeLikeInstr(I64, {Lo, Hi}), Sign), Sign); } else B.buildMergeLikeInstr(Dst, {Lo, Hi}); @@ -2746,7 +2835,7 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt( // vector of integers using ptrtoint (and inttoptr on the output) in order to // drive the legalization forward. if (EltTy.isPointer() && EltTy.getSizeInBits() > 64) { - LLT IntTy = LLT::scalar(EltTy.getSizeInBits()); + LLT IntTy = LLT::integer(EltTy.getSizeInBits()); LLT IntVecTy = VecTy.changeElementType(IntTy); auto IntVec = B.buildPtrToInt(IntVecTy, Vec); @@ -2799,7 +2888,7 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( // new value, and then inttoptr the result vector back. This will then allow // the rest of legalization to take over. if (EltTy.isPointer() && EltTy.getSizeInBits() > 64) { - LLT IntTy = LLT::scalar(EltTy.getSizeInBits()); + LLT IntTy = LLT::integer(EltTy.getSizeInBits()); LLT IntVecTy = VecTy.changeElementType(IntTy); auto IntVecSource = B.buildPtrToInt(IntVecTy, Vec); @@ -2925,13 +3014,11 @@ void AMDGPULegalizerInfo::buildAbsGlobalAddress( MachineRegisterInfo &MRI) const { bool RequiresHighHalf = PtrTy.getSizeInBits() != 32; - LLT S32 = LLT::scalar(32); - // Use the destination directly, if and only if we store the lower address // part only and we don't have a register class being set. Register AddrLo = !RequiresHighHalf && !MRI.getRegClassOrNull(DstReg) ? DstReg - : MRI.createGenericVirtualRegister(S32); + : MRI.createGenericVirtualRegister(I32); if (!MRI.getRegClassOrNull(AddrLo)) MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass); @@ -2946,7 +3033,7 @@ void AMDGPULegalizerInfo::buildAbsGlobalAddress( assert(PtrTy.getSizeInBits() == 64 && "Must provide a 64-bit pointer type!"); - Register AddrHi = MRI.createGenericVirtualRegister(S32); + Register AddrHi = MRI.createGenericVirtualRegister(I32); MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass); B.buildInstr(AMDGPU::S_MOV_B32) @@ -2957,7 +3044,7 @@ void AMDGPULegalizerInfo::buildAbsGlobalAddress( // class being set. Register AddrDst = !MRI.getRegClassOrNull(DstReg) ? DstReg - : MRI.createGenericVirtualRegister(LLT::scalar(64)); + : MRI.createGenericVirtualRegister(I64); if (!MRI.getRegClassOrNull(AddrDst)) MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass); @@ -3026,8 +3113,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) { // Adjust alignment for that dynamic shared memory array. MFI->setDynLDSAlign(MF.getFunction(), *cast(GV)); - LLT S32 = LLT::scalar(32); - auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}); + auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {I32}); B.buildIntToPtr(DstReg, Sz); MI.eraseFromParent(); return true; @@ -3087,7 +3173,7 @@ static LLT widenToNextPowerOf2(LLT Ty) { if (Ty.isVector()) return Ty.changeElementCount( ElementCount::getFixed(PowerOf2Ceil(Ty.getNumElements()))); - return LLT::scalar(PowerOf2Ceil(Ty.getSizeInBits())); + return LLT::integer(PowerOf2Ceil(Ty.getSizeInBits())); } bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper, @@ -3197,6 +3283,50 @@ bool AMDGPULegalizerInfo::legalizeStore(LegalizerHelper &Helper, return false; } +bool AMDGPULegalizerInfo::legalizeFPExt(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + // TODO: move to LegalizerHelper + const SITargetLowering *TLI = ST.getTargetLowering(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + auto ShiftTy = TLI->getPreferredShiftAmountTy(I32); + + B.buildBitcast( + DstReg, B.buildShl(I32, B.buildAnyExt(I32, B.buildBitcast(I16, SrcReg)), + B.buildConstant(ShiftTy, 16))); + + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeFPTrunc(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + // TODO: move to LegalizerHelper + const SITargetLowering *TLI = ST.getTargetLowering(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + auto ShiftTy = TLI->getPreferredShiftAmountTy(I32); + + // FIXME: + // if (!DAG.isKnownNeverSNaN(Op)) { + // Op = DAG.getNode(ISD::FCANONICALIZE, dl, MVT::f32, Op, + // Node->getFlags()); + // } + + B.buildBitcast(DstReg, + B.buildTrunc(I16, B.buildLShr(I32, B.buildBitcast(I32, SrcReg), + B.buildConstant(ShiftTy, 16)))); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFMad( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -3208,10 +3338,10 @@ bool AMDGPULegalizerInfo::legalizeFMad( // TODO: Always legal with future ftz flag. // FIXME: Do we need just output? - if (Ty == LLT::float32() && + if (Ty == F32 && MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()) return true; - if (Ty == LLT::float16() && + if (Ty == F16 && MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()) return true; @@ -3267,7 +3397,7 @@ static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI, break; } case TargetOpcode::G_FPEXT: { - return MRI.getType(DefMI->getOperand(1).getReg()) == LLT::scalar(16); + return MRI.getType(DefMI->getOperand(1).getReg()).isFloat(16); } default: return false; @@ -3296,11 +3426,10 @@ AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src, if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) return {}; - const LLT F32 = LLT::scalar(32); auto SmallestNormal = B.buildFConstant( F32, APFloat::getSmallestNormalized(APFloat::IEEEsingle())); auto IsLtSmallestNormal = - B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src, SmallestNormal); + B.buildFCmp(CmpInst::FCMP_OLT, I1, Src, SmallestNormal); auto Scale32 = B.buildFConstant(F32, 0x1.0p+32); auto One = B.buildFConstant(F32, 1.0); @@ -3324,8 +3453,7 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI, LLT Ty = B.getMRI()->getType(Dst); unsigned Flags = MI.getFlags(); - if (Ty == LLT::scalar(16)) { - const LLT F32 = LLT::scalar(32); + if (Ty.isFloat(16)) { // Nothing in half is a denormal when promoted to f32. auto Ext = B.buildFPExt(F32, Src, Flags); auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32}) @@ -3336,7 +3464,7 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI, return true; } - assert(Ty == LLT::scalar(32)); + assert(Ty.isFloat(32)); auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags); if (!ScaledInput) { @@ -3379,9 +3507,6 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, const LLT Ty = MRI.getType(X); MachineFunction &MF = B.getMF(); - const LLT F32 = LLT::scalar(32); - const LLT F16 = LLT::scalar(16); - const AMDGPUTargetMachine &TM = static_cast(MF.getTarget()); @@ -3456,8 +3581,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, // Expand isfinite(x) => fabs(x) < inf auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle())); auto Fabs = B.buildFAbs(Ty, Y); - auto IsFinite = - B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags); + auto IsFinite = B.buildFCmp(CmpInst::FCMP_OLT, I1, Fabs, Inf, Flags); R = B.buildSelect(Ty, IsFinite, R, Y, Flags).getReg(0); } @@ -3483,7 +3607,7 @@ bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, LLT Ty = B.getMRI()->getType(Dst); - if (Ty == LLT::scalar(32)) { + if (Ty.isFloat(32)) { auto [ScaledInput, IsScaled] = getScaledLogInput(B, Src, Flags); if (ScaledInput) { auto LogSrc = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}) @@ -3506,7 +3630,7 @@ bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, } } - auto Log2Operand = Ty == LLT::scalar(16) + auto Log2Operand = Ty.isFloat(16) ? B.buildFLog2(Ty, Src, Flags) : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}) .addUse(Src) @@ -3525,8 +3649,6 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI, Register Src = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - const LLT F16 = LLT::scalar(16); - const LLT F32 = LLT::scalar(32); if (Ty == F16) { // Nothing in half is a denormal when promoted to f32. @@ -3577,7 +3699,6 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register X, unsigned Flags) const { LLT Ty = B.getMRI()->getType(Dst); - LLT F32 = LLT::scalar(32); if (Ty != F32 || !needsDenormHandlingF32(B.getMF(), X, Flags)) { auto Log2E = B.buildFConstant(Ty, numbers::log2e); @@ -3596,7 +3717,7 @@ bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, auto Threshold = B.buildFConstant(Ty, -0x1.5d58a0p+6f); auto NeedsScaling = - B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, Threshold, Flags); + B.buildFCmp(CmpInst::FCMP_OLT, LLT::integer(1), X, Threshold, Flags); auto ScaleOffset = B.buildFConstant(Ty, 0x1.0p+6f); auto ScaledX = B.buildFAdd(Ty, X, ScaleOffset, Flags); auto AdjustedX = B.buildSelect(Ty, NeedsScaling, ScaledX, X, Flags); @@ -3622,8 +3743,6 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, MachineFunction &MF = B.getMF(); MachineRegisterInfo &MRI = *B.getMRI(); LLT Ty = MRI.getType(Dst); - const LLT F16 = LLT::scalar(16); - const LLT F32 = LLT::scalar(32); const bool IsExp10 = MI.getOpcode() == TargetOpcode::G_FEXP10; if (Ty == F16) { @@ -3704,19 +3823,21 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, const float ch_exp10 = 0x1.a92000p+1f; const float cl_exp10 = 0x1.4f0978p-11f; - auto MaskConst = B.buildConstant(Ty, 0xfffff000); - auto XH = B.buildAnd(Ty, X, MaskConst); - auto XL = B.buildFSub(Ty, X, XH, Flags); + auto MaskConst = B.buildConstant(I32, 0xfffff000); + auto XCast = B.buildBitcast(I32, X); + auto XH = B.buildAnd(I32, XCast, MaskConst); + auto XHCast = B.buildBitcast(Ty, XH); + auto XL = B.buildFSub(Ty, X, XHCast, Flags); auto CH = B.buildFConstant(Ty, IsExp10 ? ch_exp10 : ch_exp); - PH = B.buildFMul(Ty, XH, CH, Flags).getReg(0); + PH = B.buildFMul(Ty, XHCast, CH, Flags).getReg(0); auto CL = B.buildFConstant(Ty, IsExp10 ? cl_exp10 : cl_exp); auto XLCL = B.buildFMul(Ty, XL, CL, Flags); Register Mad0 = getMad(B, Ty, XL.getReg(0), CH.getReg(0), XLCL.getReg(0), Flags); - PL = getMad(B, Ty, XH.getReg(0), CL.getReg(0), Mad0, Flags); + PL = getMad(B, Ty, XHCast.getReg(0), CL.getReg(0), Mad0, Flags); } auto E = B.buildIntrinsicRoundeven(Ty, PH, Flags); @@ -3724,7 +3845,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, // It is unsafe to contract this fsub into the PH multiply. auto PHSubE = B.buildFSub(Ty, PH, E, FlagsNoContract); auto A = B.buildFAdd(Ty, PHSubE, PL, Flags); - auto IntE = B.buildFPTOSI(LLT::scalar(32), E); + auto IntE = B.buildFPTOSI(I32, E); auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}) .addUse(A.getReg(0)) @@ -3734,8 +3855,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, auto UnderflowCheckConst = B.buildFConstant(Ty, IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f); auto Zero = B.buildFConstant(Ty, 0.0); - auto Underflow = - B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, UnderflowCheckConst); + auto Underflow = B.buildFCmp(CmpInst::FCMP_OLT, I1, X, UnderflowCheckConst); R = B.buildSelect(Ty, Underflow, Zero, R); @@ -3745,8 +3865,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, auto OverflowCheckConst = B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f); - auto Overflow = - B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), X, OverflowCheckConst); + auto Overflow = B.buildFCmp(CmpInst::FCMP_OGT, I1, X, OverflowCheckConst); auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle())); R = B.buildSelect(Ty, Overflow, Inf, R, Flags); } @@ -3763,8 +3882,6 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI, Register Src1 = MI.getOperand(2).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - const LLT F16 = LLT::float16(); - const LLT F32 = LLT::float32(); if (Ty == F32) { auto Log = B.buildFLog2(F32, Src0, Flags); @@ -3806,8 +3923,6 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - const LLT S1 = LLT::scalar(1); - const LLT F64 = LLT::float64(); Register Dst = MI.getOperand(0).getReg(); Register OrigSrc = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); @@ -3848,7 +3963,7 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI, Register CorrectedFract = Min; if (!MI.getFlag(MachineInstr::FmNoNans)) { - auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags); + auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, I1, ModSrc, ModSrc, Flags); CorrectedFract = B.buildSelect(F64, IsNan, ModSrc, Min, Flags).getReg(0); } @@ -3864,20 +3979,26 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeBuildVector( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); - const LLT S32 = LLT::scalar(32); - const LLT S16 = LLT::scalar(16); - assert(MRI.getType(Dst) == LLT::fixed_vector(2, 16)); + assert(MRI.getType(Dst).isFixedVector(2, 16)); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); + LLT Src0Ty = MRI.getType(Src0); + LLT Src1Ty = MRI.getType(Src1); + if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC) { - assert(MRI.getType(Src0) == S32); - Src0 = B.buildTrunc(S16, MI.getOperand(1).getReg()).getReg(0); - Src1 = B.buildTrunc(S16, MI.getOperand(2).getReg()).getReg(0); + assert(MRI.getType(Src0).isScalar(32)); + Src0 = B.buildTrunc(I16, Src0).getReg(0); + Src1 = B.buildTrunc(I16, Src1).getReg(0); } - auto Merge = B.buildMergeLikeInstr(S32, {Src0, Src1}); + if (Src0Ty.isFloat() && Src1Ty.isFloat()) { + Src0 = B.buildBitcast(I16, Src0).getReg(0); + Src1 = B.buildBitcast(I16, Src1).getReg(0); + } + + auto Merge = B.buildMergeLikeInstr(I32, {Src0, Src1}); B.buildBitcast(Dst, Merge); MI.eraseFromParent(); @@ -3906,21 +4027,17 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, MachineIRBuilder &B = Helper.MIRBuilder; GISelKnownBits &KB = *Helper.getKnownBits(); - const LLT S1 = LLT::scalar(1); - const LLT S32 = LLT::scalar(32); - const LLT S64 = LLT::scalar(64); - Register Zero32; Register Zero64; auto getZero32 = [&]() -> Register { if (!Zero32) - Zero32 = B.buildConstant(S32, 0).getReg(0); + Zero32 = B.buildConstant(I32, 0).getReg(0); return Zero32; }; auto getZero64 = [&]() -> Register { if (!Zero64) - Zero64 = B.buildConstant(S64, 0).getReg(0); + Zero64 = B.buildConstant(I64, 0).getReg(0); return Zero64; }; @@ -3943,16 +4060,16 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, Register CarryAccum; if (CarryIn.size() == 1) { if (!LocalAccum) { - LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); + LocalAccum = B.buildZExt(I32, CarryIn[0]).getReg(0); return Register(); } CarryAccum = getZero32(); } else { - CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); + CarryAccum = B.buildZExt(I32, CarryIn[0]).getReg(0); for (unsigned i = 1; i + 1 < CarryIn.size(); ++i) { CarryAccum = - B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i]) + B.buildUAdde(I32, I1, CarryAccum, getZero32(), CarryIn[i]) .getReg(0); } @@ -3963,7 +4080,7 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, } auto Add = - B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back()); + B.buildUAdde(I32, I1, CarryAccum, LocalAccum, CarryIn.back()); LocalAccum = Add.getReg(0); return HaveCarryOut ? Add.getReg(1) : Register(); }; @@ -3998,15 +4115,15 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, ++j0; continue; } - auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]); + auto Mul = B.buildMul(I32, Src0[j0], Src1[j1]); if (!LocalAccum[0] || KB.getKnownBits(LocalAccum[0]).isZero()) { LocalAccum[0] = Mul.getReg(0); } else { if (CarryIn.empty()) { - LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0); + LocalAccum[0] = B.buildAdd(I32, LocalAccum[0], Mul).getReg(0); } else { LocalAccum[0] = - B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back()) + B.buildUAdde(I32, I1, LocalAccum[0], Mul, CarryIn.back()) .getReg(0); CarryIn.pop_back(); } @@ -4022,13 +4139,13 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, if (LocalAccum[0]) { if (LocalAccum.size() == 1) { - Tmp = B.buildAnyExt(S64, LocalAccum[0]).getReg(0); + Tmp = B.buildAnyExt(I64, LocalAccum[0]).getReg(0); HaveSmallAccum = true; } else if (LocalAccum[1]) { - Tmp = B.buildMergeLikeInstr(S64, LocalAccum).getReg(0); + Tmp = B.buildMergeLikeInstr(I64, LocalAccum).getReg(0); HaveSmallAccum = false; } else { - Tmp = B.buildZExt(S64, LocalAccum[0]).getReg(0); + Tmp = B.buildZExt(I64, LocalAccum[0]).getReg(0); HaveSmallAccum = true; } } else { @@ -4043,7 +4160,7 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, ++j0; continue; } - auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {S64, S1}, + auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {I64, I1}, {Src0[j0], Src1[j1], Tmp}); Tmp = Mad.getReg(0); if (!HaveSmallAccum) @@ -4053,7 +4170,7 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, ++j0; } while (j0 <= DstIndex); - auto Unmerge = B.buildUnmerge(S32, Tmp); + auto Unmerge = B.buildUnmerge(I32, Tmp); LocalAccum[0] = Unmerge.getReg(0); if (LocalAccum.size() > 1) LocalAccum[1] = Unmerge.getReg(1); @@ -4111,18 +4228,18 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper, if (i == 1) { if (!IsHighest) - Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]); + Lo = B.buildUAddo(I32, I1, Accum[2 * i - 1], SeparateOddOut[0]); else - Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]); + Lo = B.buildAdd(I32, Accum[2 * i - 1], SeparateOddOut[0]); } else { - Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0], + Lo = B.buildUAdde(I32, I1, Accum[2 * i - 1], SeparateOddOut[0], SeparateOddCarry); } Accum[2 * i - 1] = Lo->getOperand(0).getReg(); if (!IsHighest) { - auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1], - Lo->getOperand(1).getReg()); + auto Hi = B.buildUAdde(I32, I1, Accum[2 * i], SeparateOddOut[1], + Lo->getOperand(1).getReg()); Accum[2 * i] = Hi.getReg(0); SeparateOddCarry = Hi.getReg(1); } @@ -4159,7 +4276,7 @@ bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper, Register Src1 = MI.getOperand(2).getReg(); LLT Ty = MRI.getType(DstReg); - assert(Ty.isScalar()); + assert(Ty.isInteger()); unsigned Size = Ty.getSizeInBits(); unsigned NumParts = Size / 32; @@ -4176,11 +4293,10 @@ bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper, // in an even-aligned VGPR. const bool SeparateOddAlignedProducts = ST.hasFullRate64Ops(); - LLT S32 = LLT::scalar(32); SmallVector Src0Parts, Src1Parts; for (unsigned i = 0; i < NumParts; ++i) { - Src0Parts.push_back(MRI.createGenericVirtualRegister(S32)); - Src1Parts.push_back(MRI.createGenericVirtualRegister(S32)); + Src0Parts.push_back(MRI.createGenericVirtualRegister(I32)); + Src1Parts.push_back(MRI.createGenericVirtualRegister(I32)); } B.buildUnmerge(Src0Parts, Src0); B.buildUnmerge(Src1Parts, Src1); @@ -4225,10 +4341,10 @@ bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI, assert(NumBits < 32u); - auto ShiftAmt = B.buildConstant(S32, 32u - NumBits); - auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u); - auto Shift = B.buildShl(S32, Extend, ShiftAmt); - auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift}); + auto ShiftAmt = B.buildConstant(I32, 32u - NumBits); + auto Extend = B.buildAnyExt(I32, {Src}).getReg(0u); + auto Shift = B.buildShl(I32, Extend, ShiftAmt); + auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {I32}, {Shift}); B.buildTrunc(Dst, Ctlz); MI.eraseFromParent(); return true; @@ -4298,7 +4414,6 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg, *ArgRC, B.getDebugLoc(), ArgTy); if (Arg->isMasked()) { // TODO: Should we try to emit this once in the entry block? - const LLT S32 = LLT::scalar(32); const unsigned Mask = Arg->getMask(); const unsigned Shift = llvm::countr_zero(Mask); @@ -4307,11 +4422,11 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg, // TODO: Avoid clearing the high bits if we know workitem id y/z are always // 0. if (Shift != 0) { - auto ShiftAmt = B.buildConstant(S32, Shift); - AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); + auto ShiftAmt = B.buildConstant(I32, Shift); + AndMaskSrc = B.buildLShr(I32, LiveIn, ShiftAmt).getReg(0); } - B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift)); + B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(I32, Mask >> Shift)); } else { B.buildCopy(DstReg, LiveIn); } @@ -4342,17 +4457,17 @@ bool AMDGPULegalizerInfo::loadInputValue( case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Arg = &WorkGroupIDX; ArgRC = &AMDGPU::SReg_32RegClass; - ArgTy = LLT::scalar(32); + ArgTy = I32; break; case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y: Arg = &WorkGroupIDY; ArgRC = &AMDGPU::SReg_32RegClass; - ArgTy = LLT::scalar(32); + ArgTy = I32; break; case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: Arg = &WorkGroupIDZ; ArgRC = &AMDGPU::SReg_32RegClass; - ArgTy = LLT::scalar(32); + ArgTy = I32; break; default: break; @@ -4450,7 +4565,7 @@ Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) llvm_unreachable("failed to find kernarg segment ptr"); - auto COffset = B.buildConstant(LLT::scalar(64), Offset); + auto COffset = B.buildConstant(LLT::integer(64), Offset); // TODO: Should get nuw return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0); } @@ -4463,7 +4578,7 @@ bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI, Align Alignment) const { Register DstReg = MI.getOperand(0).getReg(); - assert(B.getMRI()->getType(DstReg) == LLT::scalar(32) && + assert(B.getMRI()->getType(DstReg).isScalar(32) && "unexpected kernarg parameter type"); Register Ptr = getKernargParameterPtr(B, Offset); @@ -4480,15 +4595,12 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); - LLT S16 = LLT::scalar(16); - LLT S32 = LLT::scalar(32); - LLT S64 = LLT::scalar(64); - if (DstTy == S16) + if (DstTy.isFloat(16)) return legalizeFDIV16(MI, MRI, B); - if (DstTy == S32) + if (DstTy.isFloat(32)) return legalizeFDIV32(MI, MRI, B); - if (DstTy == S64) + if (DstTy.isFloat(64)) return legalizeFDIV64(MI, MRI, B); return false; @@ -4499,42 +4611,39 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B, Register DstRemReg, Register X, Register Y) const { - const LLT S1 = LLT::scalar(1); - const LLT S32 = LLT::scalar(32); - // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the // algorithm used here. // Initial estimate of inv(y). - auto FloatY = B.buildUITOFP(S32, Y); - auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY}); - auto Scale = B.buildFConstant(S32, llvm::bit_cast(0x4f7ffffe)); - auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale); - auto Z = B.buildFPTOUI(S32, ScaledY); + auto FloatY = B.buildUITOFP(F32, Y); + auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {F32}, {FloatY}); + auto Scale = B.buildFConstant(F32, llvm::bit_cast(0x4f7ffffe)); + auto ScaledY = B.buildFMul(F32, RcpIFlag, Scale); + auto Z = B.buildFPTOUI(I32, ScaledY); // One round of UNR. - auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y); - auto NegYZ = B.buildMul(S32, NegY, Z); - Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ)); + auto NegY = B.buildSub(I32, B.buildConstant(I32, 0), Y); + auto NegYZ = B.buildMul(I32, NegY, Z); + Z = B.buildAdd(I32, Z, B.buildUMulH(I32, Z, NegYZ)); // Quotient/remainder estimate. - auto Q = B.buildUMulH(S32, X, Z); - auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y)); + auto Q = B.buildUMulH(I32, X, Z); + auto R = B.buildSub(I32, X, B.buildMul(I32, Q, Y)); // First quotient/remainder refinement. - auto One = B.buildConstant(S32, 1); - auto Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y); + auto One = B.buildConstant(I32, 1); + auto Cond = B.buildICmp(CmpInst::ICMP_UGE, I1, R, Y); if (DstDivReg) - Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q); - R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R); + Q = B.buildSelect(I32, Cond, B.buildAdd(I32, Q, One), Q); + R = B.buildSelect(I32, Cond, B.buildSub(I32, R, Y), R); // Second quotient/remainder refinement. - Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y); + Cond = B.buildICmp(CmpInst::ICMP_UGE, I1, R, Y); if (DstDivReg) - B.buildSelect(DstDivReg, Cond, B.buildAdd(S32, Q, One), Q); + B.buildSelect(DstDivReg, Cond, B.buildAdd(I32, Q, One), Q); if (DstRemReg) - B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R); + B.buildSelect(DstRemReg, Cond, B.buildSub(I32, R, Y), R); } // Build integer reciprocal sequence around V_RCP_IFLAG_F32 @@ -4552,32 +4661,31 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B, // return {G_FPTOUI %mad2, G_FPTOUI %trunc} static std::pair emitReciprocalU64(MachineIRBuilder &B, Register Val) { - const LLT S32 = LLT::scalar(32); - auto Unmerge = B.buildUnmerge(S32, Val); + auto Unmerge = B.buildUnmerge(I32, Val); - auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0)); - auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1)); + auto CvtLo = B.buildUITOFP(F32, Unmerge.getReg(0)); + auto CvtHi = B.buildUITOFP(F32, Unmerge.getReg(1)); auto Mad = B.buildFMAD( - S32, CvtHi, // 2**32 - B.buildFConstant(S32, llvm::bit_cast(0x4f800000)), CvtLo); + F32, CvtHi, // 2**32 + B.buildFConstant(F32, llvm::bit_cast(0x4f800000)), CvtLo); - auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad}); + auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {F32}, {Mad}); auto Mul1 = B.buildFMul( - S32, Rcp, B.buildFConstant(S32, llvm::bit_cast(0x5f7ffffc))); + F32, Rcp, B.buildFConstant(F32, llvm::bit_cast(0x5f7ffffc))); // 2**(-32) auto Mul2 = B.buildFMul( - S32, Mul1, B.buildFConstant(S32, llvm::bit_cast(0x2f800000))); - auto Trunc = B.buildIntrinsicTrunc(S32, Mul2); + F32, Mul1, B.buildFConstant(F32, llvm::bit_cast(0x2f800000))); + auto Trunc = B.buildIntrinsicTrunc(F32, Mul2); // -(2**32) auto Mad2 = B.buildFMAD( - S32, Trunc, B.buildFConstant(S32, llvm::bit_cast(0xcf800000)), + F32, Trunc, B.buildFConstant(F32, llvm::bit_cast(0xcf800000)), Mul1); - auto ResultLo = B.buildFPTOUI(S32, Mad2); - auto ResultHi = B.buildFPTOUI(S32, Trunc); + auto ResultLo = B.buildFPTOUI(I32, Mad2); + auto ResultHi = B.buildFPTOUI(I32, Trunc); return {ResultLo.getReg(0), ResultHi.getReg(0)}; } @@ -4587,109 +4695,106 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B, Register DstRemReg, Register Numer, Register Denom) const { - const LLT S32 = LLT::scalar(32); - const LLT S64 = LLT::scalar(64); - const LLT S1 = LLT::scalar(1); Register RcpLo, RcpHi; std::tie(RcpLo, RcpHi) = emitReciprocalU64(B, Denom); - auto Rcp = B.buildMergeLikeInstr(S64, {RcpLo, RcpHi}); + auto Rcp = B.buildMergeLikeInstr(I64, {RcpLo, RcpHi}); - auto Zero64 = B.buildConstant(S64, 0); - auto NegDenom = B.buildSub(S64, Zero64, Denom); + auto Zero64 = B.buildConstant(I64, 0); + auto NegDenom = B.buildSub(I64, Zero64, Denom); - auto MulLo1 = B.buildMul(S64, NegDenom, Rcp); - auto MulHi1 = B.buildUMulH(S64, Rcp, MulLo1); + auto MulLo1 = B.buildMul(I64, NegDenom, Rcp); + auto MulHi1 = B.buildUMulH(I64, Rcp, MulLo1); - auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1); + auto UnmergeMulHi1 = B.buildUnmerge(I32, MulHi1); Register MulHi1_Lo = UnmergeMulHi1.getReg(0); Register MulHi1_Hi = UnmergeMulHi1.getReg(1); - auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo); - auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1)); - auto Add1 = B.buildMergeLikeInstr(S64, {Add1_Lo, Add1_Hi}); + auto Add1_Lo = B.buildUAddo(I32, I1, RcpLo, MulHi1_Lo); + auto Add1_Hi = B.buildUAdde(I32, I1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1)); + auto Add1 = B.buildMergeLikeInstr(I64, {Add1_Lo, Add1_Hi}); - auto MulLo2 = B.buildMul(S64, NegDenom, Add1); - auto MulHi2 = B.buildUMulH(S64, Add1, MulLo2); - auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2); + auto MulLo2 = B.buildMul(I64, NegDenom, Add1); + auto MulHi2 = B.buildUMulH(I64, Add1, MulLo2); + auto UnmergeMulHi2 = B.buildUnmerge(I32, MulHi2); Register MulHi2_Lo = UnmergeMulHi2.getReg(0); Register MulHi2_Hi = UnmergeMulHi2.getReg(1); - auto Zero32 = B.buildConstant(S32, 0); - auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo); - auto Add2_Hi = B.buildUAdde(S32, S1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1)); - auto Add2 = B.buildMergeLikeInstr(S64, {Add2_Lo, Add2_Hi}); + auto Zero32 = B.buildConstant(I32, 0); + auto Add2_Lo = B.buildUAddo(I32, I1, Add1_Lo, MulHi2_Lo); + auto Add2_Hi = B.buildUAdde(I32, I1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1)); + auto Add2 = B.buildMergeLikeInstr(I64, {Add2_Lo, Add2_Hi}); - auto UnmergeNumer = B.buildUnmerge(S32, Numer); + auto UnmergeNumer = B.buildUnmerge(I32, Numer); Register NumerLo = UnmergeNumer.getReg(0); Register NumerHi = UnmergeNumer.getReg(1); - auto MulHi3 = B.buildUMulH(S64, Numer, Add2); - auto Mul3 = B.buildMul(S64, Denom, MulHi3); - auto UnmergeMul3 = B.buildUnmerge(S32, Mul3); + auto MulHi3 = B.buildUMulH(I64, Numer, Add2); + auto Mul3 = B.buildMul(I64, Denom, MulHi3); + auto UnmergeMul3 = B.buildUnmerge(I32, Mul3); Register Mul3_Lo = UnmergeMul3.getReg(0); Register Mul3_Hi = UnmergeMul3.getReg(1); - auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo); - auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1)); - auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi); - auto Sub1 = B.buildMergeLikeInstr(S64, {Sub1_Lo, Sub1_Hi}); + auto Sub1_Lo = B.buildUSubo(I32, I1, NumerLo, Mul3_Lo); + auto Sub1_Hi = B.buildUSube(I32, I1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1)); + auto Sub1_Mi = B.buildSub(I32, NumerHi, Mul3_Hi); + auto Sub1 = B.buildMergeLikeInstr(I64, {Sub1_Lo, Sub1_Hi}); - auto UnmergeDenom = B.buildUnmerge(S32, Denom); + auto UnmergeDenom = B.buildUnmerge(I32, Denom); Register DenomLo = UnmergeDenom.getReg(0); Register DenomHi = UnmergeDenom.getReg(1); - auto CmpHi = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Hi, DenomHi); - auto C1 = B.buildSExt(S32, CmpHi); + auto CmpHi = B.buildICmp(CmpInst::ICMP_UGE, I1, Sub1_Hi, DenomHi); + auto C1 = B.buildSExt(I32, CmpHi); - auto CmpLo = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Lo, DenomLo); - auto C2 = B.buildSExt(S32, CmpLo); + auto CmpLo = B.buildICmp(CmpInst::ICMP_UGE, I1, Sub1_Lo, DenomLo); + auto C2 = B.buildSExt(I32, CmpLo); - auto CmpEq = B.buildICmp(CmpInst::ICMP_EQ, S1, Sub1_Hi, DenomHi); - auto C3 = B.buildSelect(S32, CmpEq, C2, C1); + auto CmpEq = B.buildICmp(CmpInst::ICMP_EQ, I1, Sub1_Hi, DenomHi); + auto C3 = B.buildSelect(I32, CmpEq, C2, C1); // TODO: Here and below portions of the code can be enclosed into if/endif. // Currently control flow is unconditional and we have 4 selects after // potential endif to substitute PHIs. // if C3 != 0 ... - auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo); - auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1)); - auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1)); - auto Sub2 = B.buildMergeLikeInstr(S64, {Sub2_Lo, Sub2_Hi}); + auto Sub2_Lo = B.buildUSubo(I32, I1, Sub1_Lo, DenomLo); + auto Sub2_Mi = B.buildUSube(I32, I1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1)); + auto Sub2_Hi = B.buildUSube(I32, I1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1)); + auto Sub2 = B.buildMergeLikeInstr(I64, {Sub2_Lo, Sub2_Hi}); - auto One64 = B.buildConstant(S64, 1); - auto Add3 = B.buildAdd(S64, MulHi3, One64); + auto One64 = B.buildConstant(I64, 1); + auto Add3 = B.buildAdd(I64, MulHi3, One64); auto C4 = - B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi)); + B.buildSExt(I32, B.buildICmp(CmpInst::ICMP_UGE, I1, Sub2_Hi, DenomHi)); auto C5 = - B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo)); + B.buildSExt(I32, B.buildICmp(CmpInst::ICMP_UGE, I1, Sub2_Lo, DenomLo)); auto C6 = B.buildSelect( - S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4); + I32, B.buildICmp(CmpInst::ICMP_EQ, I1, Sub2_Hi, DenomHi), C5, C4); // if (C6 != 0) - auto Add4 = B.buildAdd(S64, Add3, One64); - auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo); + auto Add4 = B.buildAdd(I64, Add3, One64); + auto Sub3_Lo = B.buildUSubo(I32, I1, Sub2_Lo, DenomLo); - auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1)); - auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1)); - auto Sub3 = B.buildMergeLikeInstr(S64, {Sub3_Lo, Sub3_Hi}); + auto Sub3_Mi = B.buildUSube(I32, I1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1)); + auto Sub3_Hi = B.buildUSube(I32, I1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1)); + auto Sub3 = B.buildMergeLikeInstr(I64, {Sub3_Lo, Sub3_Hi}); // endif C6 // endif C3 if (DstDivReg) { auto Sel1 = B.buildSelect( - S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3); - B.buildSelect(DstDivReg, B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), + I64, B.buildICmp(CmpInst::ICMP_NE, I1, C6, Zero32), Add4, Add3); + B.buildSelect(DstDivReg, B.buildICmp(CmpInst::ICMP_NE, I1, C3, Zero32), Sel1, MulHi3); } if (DstRemReg) { auto Sel2 = B.buildSelect( - S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2); - B.buildSelect(DstRemReg, B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), + I64, B.buildICmp(CmpInst::ICMP_NE, I1, C6, Zero32), Sub3, Sub2); + B.buildSelect(DstRemReg, B.buildICmp(CmpInst::ICMP_NE, I1, C3, Zero32), Sel2, Sub1); } } @@ -4716,16 +4821,14 @@ bool AMDGPULegalizerInfo::legalizeUnsignedDIV_REM(MachineInstr &MI, } } - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); const unsigned FirstSrcOpIdx = MI.getNumExplicitDefs(); Register Num = MI.getOperand(FirstSrcOpIdx).getReg(); Register Den = MI.getOperand(FirstSrcOpIdx + 1).getReg(); LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Ty == S32) + if (Ty.isInteger(32)) legalizeUnsignedDIV_REM32Impl(B, DstDivReg, DstRemReg, Num, Den); - else if (Ty == S64) + else if (Ty.isInteger(64)) legalizeUnsignedDIV_REM64Impl(B, DstDivReg, DstRemReg, Num, Den); else return false; @@ -4737,18 +4840,15 @@ bool AMDGPULegalizerInfo::legalizeUnsignedDIV_REM(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeSignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Ty != S32 && Ty != S64) + if (!Ty.isInteger(32) && !Ty.isInteger(64)) return false; const unsigned FirstSrcOpIdx = MI.getNumExplicitDefs(); Register LHS = MI.getOperand(FirstSrcOpIdx).getReg(); Register RHS = MI.getOperand(FirstSrcOpIdx + 1).getReg(); - auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1); + auto SignBitOffset = B.buildConstant(I32, Ty.getSizeInBits() - 1); auto LHSign = B.buildAShr(Ty, LHS, SignBitOffset); auto RHSign = B.buildAShr(Ty, RHS, SignBitOffset); @@ -4781,7 +4881,7 @@ bool AMDGPULegalizerInfo::legalizeSignedDIV_REM(MachineInstr &MI, } } - if (Ty == S32) + if (Ty.isInteger(32)) legalizeUnsignedDIV_REM32Impl(B, TmpDivReg, TmpRemReg, LHS, RHS); else legalizeUnsignedDIV_REM64Impl(B, TmpDivReg, TmpRemReg, LHS, RHS); @@ -4816,7 +4916,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, MF.getTarget().Options.UnsafeFPMath; if (const auto *CLHS = getConstantFPVRegVal(LHS, MRI)) { - if (!AllowInaccurateRcp && ResTy != LLT::scalar(16)) + if (!AllowInaccurateRcp && !ResTy.isScalar(16)) return false; // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to @@ -4850,8 +4950,8 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, // For f16 require afn or arcp. // For f32 require afn. - if (!AllowInaccurateRcp && (ResTy != LLT::scalar(16) || - !MI.getFlag(MachineInstr::FmArcp))) + if (!AllowInaccurateRcp && + (!ResTy.isScalar(16) || !MI.getFlag(MachineInstr::FmArcp))) return false; // x / y -> x * (1.0 / y) @@ -4913,9 +5013,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI, uint16_t Flags = MI.getFlags(); - LLT S16 = LLT::scalar(16); - LLT S32 = LLT::scalar(32); - // a32.u = opx(V_CVT_F32_F16, a.u); // CVT to F32 // b32.u = opx(V_CVT_F32_F16, b.u); // CVT to F32 // r32.u = opx(V_RCP_F32, b32.u); // rcp = 1 / d @@ -4929,27 +5026,29 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI, // q16.u = opx(V_CVT_F16_F32, q32.u); // q16.u = opx(V_DIV_FIXUP_F16, q16.u, b.u, a.u); // q = touchup(q, d, n) - auto LHSExt = B.buildFPExt(S32, LHS, Flags); - auto RHSExt = B.buildFPExt(S32, RHS, Flags); - auto NegRHSExt = B.buildFNeg(S32, RHSExt); - auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}) + auto LHSExt = B.buildFPExt(F32, LHS, Flags); + auto RHSExt = B.buildFPExt(F32, RHS, Flags); + auto NegRHSExt = B.buildFNeg(F32, RHSExt); + auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {F32}) .addUse(RHSExt.getReg(0)) .setMIFlags(Flags); - auto Quot = B.buildFMul(S32, LHSExt, Rcp, Flags); + auto Quot = B.buildFMul(F32, LHSExt, Rcp, Flags); MachineInstrBuilder Err; if (ST.hasMadMacF32Insts()) { - Err = B.buildFMAD(S32, NegRHSExt, Quot, LHSExt, Flags); - Quot = B.buildFMAD(S32, Err, Rcp, Quot, Flags); - Err = B.buildFMAD(S32, NegRHSExt, Quot, LHSExt, Flags); + Err = B.buildFMAD(F32, NegRHSExt, Quot, LHSExt, Flags); + Quot = B.buildFMAD(F32, Err, Rcp, Quot, Flags); + Err = B.buildFMAD(F32, NegRHSExt, Quot, LHSExt, Flags); } else { - Err = B.buildFMA(S32, NegRHSExt, Quot, LHSExt, Flags); - Quot = B.buildFMA(S32, Err, Rcp, Quot, Flags); - Err = B.buildFMA(S32, NegRHSExt, Quot, LHSExt, Flags); - } - auto Tmp = B.buildFMul(S32, Err, Rcp, Flags); - Tmp = B.buildAnd(S32, Tmp, B.buildConstant(S32, 0xff800000)); - Quot = B.buildFAdd(S32, Tmp, Quot, Flags); - auto RDst = B.buildFPTrunc(S16, Quot, Flags); + Err = B.buildFMA(F32, NegRHSExt, Quot, LHSExt, Flags); + Quot = B.buildFMA(F32, Err, Rcp, Quot, Flags); + Err = B.buildFMA(F32, NegRHSExt, Quot, LHSExt, Flags); + } + auto Tmp = B.buildFMul(F32, Err, Rcp, Flags); + Tmp = B.buildBitcast(I32, Tmp); + Tmp = B.buildAnd(I32, Tmp, B.buildConstant(I32, 0xff800000)); + Tmp = B.buildBitcast(F32, Tmp); + Quot = B.buildFAdd(F32, Tmp, Quot, Flags); + auto RDst = B.buildFPTrunc(F16, Quot, Flags); B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res) .addUse(RDst.getReg(0)) .addUse(RHS) @@ -5001,28 +5100,25 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, uint16_t Flags = MI.getFlags(); - LLT S32 = LLT::scalar(32); - LLT S1 = LLT::scalar(1); - - auto One = B.buildFConstant(S32, 1.0f); + auto One = B.buildFConstant(F32, 1.0f); auto DenominatorScaled = - B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}) + B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {F32, I1}) .addUse(LHS) .addUse(RHS) .addImm(0) .setMIFlags(Flags); auto NumeratorScaled = - B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}) + B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {F32, I1}) .addUse(LHS) .addUse(RHS) .addImm(1) .setMIFlags(Flags); - auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}) + auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {F32}) .addUse(DenominatorScaled.getReg(0)) .setMIFlags(Flags); - auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags); + auto NegDivScale0 = B.buildFNeg(F32, DenominatorScaled, Flags); const bool PreservesDenormals = Mode.FP32Denormals == DenormalMode::getIEEE(); const bool HasDynamicDenormals = @@ -5040,12 +5136,12 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, toggleSPDenormMode(true, B, ST, Mode); } - auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags); - auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags); - auto Mul = B.buildFMul(S32, NumeratorScaled, Fma1, Flags); - auto Fma2 = B.buildFMA(S32, NegDivScale0, Mul, NumeratorScaled, Flags); - auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags); - auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags); + auto Fma0 = B.buildFMA(F32, NegDivScale0, ApproxRcp, One, Flags); + auto Fma1 = B.buildFMA(F32, Fma0, ApproxRcp, ApproxRcp, Flags); + auto Mul = B.buildFMul(F32, NumeratorScaled, Fma1, Flags); + auto Fma2 = B.buildFMA(F32, NegDivScale0, Mul, NumeratorScaled, Flags); + auto Fma3 = B.buildFMA(F32, Fma2, Fma1, Mul, Flags); + auto Fma4 = B.buildFMA(F32, NegDivScale0, Fma3, NumeratorScaled, Flags); if (!PreservesDenormals) { if (HasDynamicDenormals) { @@ -5057,7 +5153,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, toggleSPDenormMode(false, B, ST, Mode); } - auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}) + auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {F32}) .addUse(Fma4.getReg(0)) .addUse(Fma1.getReg(0)) .addUse(Fma3.getReg(0)) @@ -5086,59 +5182,54 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI, uint16_t Flags = MI.getFlags(); - LLT S64 = LLT::scalar(64); - LLT S1 = LLT::scalar(1); - - auto One = B.buildFConstant(S64, 1.0); + auto One = B.buildFConstant(F64, 1.0); - auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}) + auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {F64, I1}) .addUse(LHS) .addUse(RHS) .addImm(0) .setMIFlags(Flags); - auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags); + auto NegDivScale0 = B.buildFNeg(F64, DivScale0.getReg(0), Flags); - auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}) + auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {F64}) .addUse(DivScale0.getReg(0)) .setMIFlags(Flags); - auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags); - auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags); - auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags); + auto Fma0 = B.buildFMA(F64, NegDivScale0, Rcp, One, Flags); + auto Fma1 = B.buildFMA(F64, Rcp, Fma0, Rcp, Flags); + auto Fma2 = B.buildFMA(F64, NegDivScale0, Fma1, One, Flags); - auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}) + auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {F64, I1}) .addUse(LHS) .addUse(RHS) .addImm(1) .setMIFlags(Flags); - auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags); - auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags); - auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags); + auto Fma3 = B.buildFMA(F64, Fma1, Fma2, Fma1, Flags); + auto Mul = B.buildFMul(F64, DivScale1.getReg(0), Fma3, Flags); + auto Fma4 = B.buildFMA(F64, NegDivScale0, Mul, DivScale1.getReg(0), Flags); Register Scale; if (!ST.hasUsableDivScaleConditionOutput()) { // Workaround a hardware bug on SI where the condition output from div_scale // is not usable. - LLT S32 = LLT::scalar(32); + auto NumUnmerge = B.buildUnmerge(I32, B.buildBitcast(I64, LHS)); + auto DenUnmerge = B.buildUnmerge(I32, B.buildBitcast(I64, RHS)); + auto Scale0Unmerge = B.buildUnmerge(I32, B.buildBitcast(I64, DivScale0)); + auto Scale1Unmerge = B.buildUnmerge(I32, B.buildBitcast(I64, DivScale1)); - auto NumUnmerge = B.buildUnmerge(S32, LHS); - auto DenUnmerge = B.buildUnmerge(S32, RHS); - auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0); - auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1); - - auto CmpNum = B.buildICmp(ICmpInst::ICMP_EQ, S1, NumUnmerge.getReg(1), + auto CmpNum = B.buildICmp(ICmpInst::ICMP_EQ, I1, NumUnmerge.getReg(1), Scale1Unmerge.getReg(1)); - auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, S1, DenUnmerge.getReg(1), + auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, I1, DenUnmerge.getReg(1), Scale0Unmerge.getReg(1)); - Scale = B.buildXor(S1, CmpNum, CmpDen).getReg(0); + Scale = B.buildXor(I1, CmpNum, CmpDen).getReg(0); } else { Scale = DivScale1.getReg(1); } - auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}) + auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {F64}) .addUse(Fma4.getReg(0)) .addUse(Fma3.getReg(0)) .addUse(Mul.getReg(0)) @@ -5164,7 +5255,7 @@ bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI, uint16_t Flags = MI.getFlags(); LLT Ty = MRI.getType(Res0); - LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32); + LLT InstrExpTy = Ty.isFloat(16) ? I16 : I32; auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}) .addUse(Val) @@ -5176,8 +5267,7 @@ bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI, if (ST.hasFractBug()) { auto Fabs = B.buildFAbs(Ty, Val); auto Inf = B.buildFConstant(Ty, APFloat::getInf(getFltSemanticForLLT(Ty))); - auto IsFinite = - B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags); + auto IsFinite = B.buildFCmp(CmpInst::FCMP_OLT, I1, Fabs, Inf, Flags); auto Zero = B.buildConstant(InstrExpTy, 0); Exp = B.buildSelect(InstrExpTy, IsFinite, Exp, Zero); Mant = B.buildSelect(Ty, IsFinite, Mant, Val); @@ -5198,26 +5288,23 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, Register RHS = MI.getOperand(3).getReg(); uint16_t Flags = MI.getFlags(); - LLT S32 = LLT::scalar(32); - LLT S1 = LLT::scalar(1); - - auto Abs = B.buildFAbs(S32, RHS, Flags); + auto Abs = B.buildFAbs(F32, RHS, Flags); const APFloat C0Val(1.0f); - auto C0 = B.buildFConstant(S32, 0x1p+96f); - auto C1 = B.buildFConstant(S32, 0x1p-32f); - auto C2 = B.buildFConstant(S32, 1.0f); + auto C0 = B.buildFConstant(F32, 0x1p+96f); + auto C1 = B.buildFConstant(F32, 0x1p-32f); + auto C2 = B.buildFConstant(F32, 1.0f); - auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags); - auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); + auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, I1, Abs, C0, Flags); + auto Sel = B.buildSelect(F32, CmpRes, C1, C2, Flags); - auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags); + auto Mul0 = B.buildFMul(F32, RHS, Sel, Flags); - auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}) + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {F32}) .addUse(Mul0.getReg(0)) .setMIFlags(Flags); - auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags); + auto Mul1 = B.buildFMul(F32, LHS, RCP, Flags); B.buildFMul(Res, Sel, Mul1, Flags); @@ -5232,7 +5319,6 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF16(MachineInstr &MI, // get. The f32 op is accurate enough for the f16 cas. unsigned Flags = MI.getFlags(); assert(!ST.has16BitInsts()); - const LLT F32 = LLT::scalar(32); auto Ext = B.buildFPExt(F32, MI.getOperand(1), Flags); auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_sqrt, {F32}) .addUse(Ext.getReg(0)) @@ -5249,9 +5335,6 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI, Register Dst = MI.getOperand(0).getReg(); Register X = MI.getOperand(1).getReg(); const unsigned Flags = MI.getFlags(); - const LLT S1 = LLT::scalar(1); - const LLT F32 = LLT::scalar(32); - const LLT I32 = LLT::scalar(32); if (allowApproxFunc(MF, Flags)) { B.buildIntrinsic(Intrinsic::amdgcn_sqrt, ArrayRef({Dst})) @@ -5262,7 +5345,7 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI, } auto ScaleThreshold = B.buildFConstant(F32, 0x1.0p-96f); - auto NeedScale = B.buildFCmp(CmpInst::FCMP_OGT, S1, ScaleThreshold, X, Flags); + auto NeedScale = B.buildFCmp(CmpInst::FCMP_OGT, I1, ScaleThreshold, X, Flags); auto ScaleUpFactor = B.buildFConstant(F32, 0x1.0p+32f); auto ScaledX = B.buildFMul(F32, X, ScaleUpFactor, Flags); auto SqrtX = B.buildSelect(F32, NeedScale, ScaledX, X, Flags); @@ -5272,26 +5355,27 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI, B.buildIntrinsic(Intrinsic::amdgcn_sqrt, ArrayRef({SqrtS})) .addUse(SqrtX.getReg(0)) .setMIFlags(Flags); + auto SqrtSInt = B.buildBitcast(I32, SqrtS); auto NegOne = B.buildConstant(I32, -1); - auto SqrtSNextDown = B.buildAdd(I32, SqrtS, NegOne); + auto SqrtSNextDown = B.buildBitcast(F32, B.buildAdd(I32, SqrtSInt, NegOne)); auto NegSqrtSNextDown = B.buildFNeg(F32, SqrtSNextDown, Flags); auto SqrtVP = B.buildFMA(F32, NegSqrtSNextDown, SqrtS, SqrtX, Flags); auto PosOne = B.buildConstant(I32, 1); - auto SqrtSNextUp = B.buildAdd(I32, SqrtS, PosOne); + auto SqrtSNextUp = B.buildBitcast(F32, B.buildAdd(I32, SqrtSInt, PosOne)); auto NegSqrtSNextUp = B.buildFNeg(F32, SqrtSNextUp, Flags); auto SqrtVS = B.buildFMA(F32, NegSqrtSNextUp, SqrtS, SqrtX, Flags); auto Zero = B.buildFConstant(F32, 0.0f); - auto SqrtVPLE0 = B.buildFCmp(CmpInst::FCMP_OLE, S1, SqrtVP, Zero, Flags); + auto SqrtVPLE0 = B.buildFCmp(CmpInst::FCMP_OLE, I1, SqrtVP, Zero, Flags); SqrtS = B.buildSelect(F32, SqrtVPLE0, SqrtSNextDown, SqrtS, Flags).getReg(0); - auto SqrtVPVSGT0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, SqrtVS, Zero, Flags); + auto SqrtVPVSGT0 = B.buildFCmp(CmpInst::FCMP_OGT, I1, SqrtVS, Zero, Flags); SqrtS = B.buildSelect(F32, SqrtVPVSGT0, SqrtSNextUp, SqrtS, Flags).getReg(0); } else { @@ -5316,7 +5400,7 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI, SqrtS = B.buildSelect(F32, NeedScale, ScaledDown, SqrtS, Flags).getReg(0); - auto IsZeroOrInf = B.buildIsFPClass(LLT::scalar(1), SqrtX, fcZero | fcPosInf); + auto IsZeroOrInf = B.buildIsFPClass(I1, SqrtX, fcZero | fcPosInf); B.buildSelect(Dst, IsZeroOrInf, SqrtX, SqrtS, Flags); MI.eraseFromParent(); @@ -5346,10 +5430,6 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI, // // sqrt(x) = g3 - const LLT S1 = LLT::scalar(1); - const LLT S32 = LLT::scalar(32); - const LLT F64 = LLT::scalar(64); - Register Dst = MI.getOperand(0).getReg(); assert(MRI.getType(Dst) == F64 && "only expect to lower f64 sqrt"); @@ -5358,12 +5438,12 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI, auto ScaleConstant = B.buildFConstant(F64, 0x1.0p-767); - auto ZeroInt = B.buildConstant(S32, 0); - auto Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, S1, X, ScaleConstant); + auto ZeroInt = B.buildConstant(I32, 0); + auto Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, I1, X, ScaleConstant); // Scale up input if it is too small. - auto ScaleUpFactor = B.buildConstant(S32, 256); - auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt); + auto ScaleUpFactor = B.buildConstant(I32, 256); + auto ScaleUp = B.buildSelect(I32, Scaling, ScaleUpFactor, ZeroInt); auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags); auto SqrtY = @@ -5390,15 +5470,15 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI, auto SqrtRet = B.buildFMA(F64, SqrtD1, SqrtH1, SqrtS2); // Scale down the result. - auto ScaleDownFactor = B.buildConstant(S32, -128); - auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt); + auto ScaleDownFactor = B.buildConstant(I32, -128); + auto ScaleDown = B.buildSelect(I32, Scaling, ScaleDownFactor, ZeroInt); SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags); // TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check // with finite only or nsz because rsq(+/-0) = +/-inf // TODO: Check for DAZ and expand to subnormals - auto IsZeroOrInf = B.buildIsFPClass(LLT::scalar(1), SqrtX, fcZero | fcPosInf); + auto IsZeroOrInf = B.buildIsFPClass(I1, SqrtX, fcZero | fcPosInf); // If x is +INF, +0, or -0, use its original value B.buildSelect(Dst, IsZeroOrInf, SqrtX, SqrtRet, Flags); @@ -5411,11 +5491,11 @@ bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Ty == LLT::scalar(32)) + if (Ty.isFloat(32)) return legalizeFSQRTF32(MI, MRI, B); - if (Ty == LLT::scalar(64)) + if (Ty.isFloat(64)) return legalizeFSQRTF64(MI, MRI, B); - if (Ty == LLT::scalar(16)) + if (Ty.isFloat(16)) return legalizeFSQRTF16(MI, MRI, B); return false; } @@ -5438,14 +5518,11 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI, LLT Ty = MRI.getType(Dst); - const fltSemantics *FltSemantics; - if (Ty == LLT::scalar(32)) - FltSemantics = &APFloat::IEEEsingle(); - else if (Ty == LLT::scalar(64)) - FltSemantics = &APFloat::IEEEdouble(); - else + if (!Ty.isFloat()) return false; + const llvm::fltSemantics &FltSemantics = getFltSemanticForLLT(Ty); + auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty}) .addUse(Src) .setMIFlags(Flags); @@ -5455,11 +5532,11 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI, const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); const bool UseIEEE = MFI->getMode().IEEE; - auto MaxFlt = B.buildFConstant(Ty, APFloat::getLargest(*FltSemantics)); + auto MaxFlt = B.buildFConstant(Ty, APFloat::getLargest(FltSemantics)); auto ClampMax = UseIEEE ? B.buildFMinNumIEEE(Ty, Rsq, MaxFlt, Flags) : B.buildFMinNum(Ty, Rsq, MaxFlt, Flags); - auto MinFlt = B.buildFConstant(Ty, APFloat::getLargest(*FltSemantics, true)); + auto MinFlt = B.buildFConstant(Ty, APFloat::getLargest(FltSemantics, true)); if (UseIEEE) B.buildFMaxNumIEEE(Dst, ClampMax, MinFlt, Flags); @@ -5477,6 +5554,18 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, MachineIRBuilder &B = Helper.MIRBuilder; MachineRegisterInfo &MRI = *B.getMRI(); + for (auto &Use : MI.all_uses()) { + LLT Ty = MRI.getType(Use.getReg()); + if (Ty.isFloat() || Ty.isFloatVector()) + Helper.bitcastSrc(MI, Ty.changeToInteger(), Use.getOperandNo()); + } + + for (auto &Def : MI.all_defs()) { + LLT Ty = MRI.getType(Def.getReg()); + if (Ty.isFloat() || Ty.isFloatVector()) + Helper.bitcastDst(MI, Ty.changeToInteger(), Def.getOperandNo()); + } + bool IsPermLane16 = IID == Intrinsic::amdgcn_permlane16 || IID == Intrinsic::amdgcn_permlanex16; bool IsSetInactive = IID == Intrinsic::amdgcn_set_inactive || @@ -5547,15 +5636,15 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, } if (Size < 32) { - Src0 = B.buildAnyExt(S32, Src0).getReg(0); + Src0 = B.buildAnyExt(I32, Src0).getReg(0); if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16) - Src1 = B.buildAnyExt(LLT::scalar(32), Src1).getReg(0); + Src1 = B.buildAnyExt(I32, Src1).getReg(0); if (IID == Intrinsic::amdgcn_writelane) - Src2 = B.buildAnyExt(LLT::scalar(32), Src2).getReg(0); + Src2 = B.buildAnyExt(I32, Src2).getReg(0); - Register LaneOpDst = createLaneOp(Src0, Src1, Src2, S32); + Register LaneOpDst = createLaneOp(Src0, Src1, Src2, I32); B.buildTrunc(DstReg, LaneOpDst); MI.eraseFromParent(); return true; @@ -5564,7 +5653,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, if (Size % SplitSize != 0) return false; - LLT PartialResTy = LLT::scalar(SplitSize); + LLT PartialResTy = LLT::integer(SplitSize); if (Ty.isVector()) { LLT EltTy = Ty.getElementType(); unsigned EltSize = EltTy.getSizeInBits(); @@ -5612,7 +5701,7 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg, ST.getTargetLowering()->getImplicitParameterOffset( B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT); LLT DstTy = MRI.getType(DstReg); - LLT IdxTy = LLT::scalar(DstTy.getSizeInBits()); + LLT IdxTy = LLT::integer(DstTy.getSizeInBits()); Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy); if (!loadInputValue(KernargPtrReg, B, @@ -5636,15 +5725,13 @@ bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin( Register NumRecords = MI.getOperand(4).getReg(); Register Flags = MI.getOperand(5).getReg(); - LLT S32 = LLT::scalar(32); - B.setInsertPt(B.getMBB(), ++B.getInsertPt()); - auto Unmerge = B.buildUnmerge(S32, Pointer); + auto Unmerge = B.buildUnmerge(I32, Pointer); Register LowHalf = Unmerge.getReg(0); Register HighHalf = Unmerge.getReg(1); - auto AndMask = B.buildConstant(S32, 0x0000ffff); - auto Masked = B.buildAnd(S32, HighHalf, AndMask); + auto AndMask = B.buildConstant(I32, 0x0000ffff); + auto Masked = B.buildAnd(I32, HighHalf, AndMask); MachineInstrBuilder NewHighHalf = Masked; std::optional StrideConst = @@ -5654,13 +5741,13 @@ bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin( if (StrideConst) { uint32_t StrideVal = StrideConst->Value.getZExtValue(); uint32_t ShiftedStrideVal = StrideVal << 16; - ShiftedStride = B.buildConstant(S32, ShiftedStrideVal); + ShiftedStride = B.buildConstant(I32, ShiftedStrideVal); } else { - auto ExtStride = B.buildAnyExt(S32, Stride); - auto ShiftConst = B.buildConstant(S32, 16); - ShiftedStride = B.buildShl(S32, ExtStride, ShiftConst); + auto ExtStride = B.buildAnyExt(I32, Stride); + auto ShiftConst = B.buildConstant(I32, 16); + ShiftedStride = B.buildShl(I32, ExtStride, ShiftConst); } - NewHighHalf = B.buildOr(S32, Masked, ShiftedStride); + NewHighHalf = B.buildOr(I32, Masked, ShiftedStride); } Register NewHighHalfReg = NewHighHalf.getReg(0); B.buildMergeValues(Result, {LowHalf, NewHighHalfReg, NumRecords, Flags}); @@ -5719,7 +5806,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineIRBuilder &B, unsigned AddrSpace) const { Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); - auto Unmerge = B.buildUnmerge(LLT::scalar(32), MI.getOperand(2).getReg()); + auto Unmerge = B.buildUnmerge(I32, MI.getOperand(2).getReg()); Register Hi32 = Unmerge.getReg(1); B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); @@ -5739,7 +5826,6 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B, const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(ST); Register BaseReg; unsigned ImmOffset; - const LLT S32 = LLT::scalar(32); MachineRegisterInfo &MRI = *B.getMRI(); std::tie(BaseReg, ImmOffset) = @@ -5766,15 +5852,15 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B, if (Overflow != 0) { if (!BaseReg) { - BaseReg = B.buildConstant(S32, Overflow).getReg(0); + BaseReg = B.buildConstant(I32, Overflow).getReg(0); } else { - auto OverflowVal = B.buildConstant(S32, Overflow); - BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0); + auto OverflowVal = B.buildConstant(I32, Overflow); + BaseReg = B.buildAdd(I32, BaseReg, OverflowVal).getReg(0); } } if (!BaseReg) - BaseReg = B.buildConstant(S32, 0).getReg(0); + BaseReg = B.buildConstant(I32, 0).getReg(0); return std::pair(BaseReg, ImmOffset); } @@ -5784,61 +5870,59 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg, bool ImageStore) const { - const LLT S16 = LLT::scalar(16); - const LLT S32 = LLT::scalar(32); LLT StoreVT = MRI.getType(Reg); - assert(StoreVT.isVector() && StoreVT.getElementType() == S16); + assert(StoreVT.isVector() && StoreVT.getElementType().isScalar(16)); if (ST.hasUnpackedD16VMem()) { - auto Unmerge = B.buildUnmerge(S16, Reg); + if (StoreVT.isFloatVector()) + Reg = B.buildBitcast(StoreVT.changeToInteger(), Reg).getReg(0); + + auto Unmerge = B.buildUnmerge(I16, Reg); SmallVector WideRegs; for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) - WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + WideRegs.push_back(B.buildAnyExt(I32, Unmerge.getReg(I)).getReg(0)); int NumElts = StoreVT.getNumElements(); - return B.buildBuildVector(LLT::fixed_vector(NumElts, S32), WideRegs) + return B.buildBuildVector(LLT::fixed_vector(NumElts, I32), WideRegs) .getReg(0); } if (ImageStore && ST.hasImageStoreD16Bug()) { if (StoreVT.getNumElements() == 2) { SmallVector PackedRegs; - Reg = B.buildBitcast(S32, Reg).getReg(0); + Reg = B.buildBitcast(I32, Reg).getReg(0); PackedRegs.push_back(Reg); - PackedRegs.resize(2, B.buildUndef(S32).getReg(0)); - return B.buildBuildVector(LLT::fixed_vector(2, S32), PackedRegs) - .getReg(0); + PackedRegs.resize(2, B.buildUndef(I32).getReg(0)); + return B.buildBuildVector(V2I32, PackedRegs).getReg(0); } if (StoreVT.getNumElements() == 3) { SmallVector PackedRegs; - auto Unmerge = B.buildUnmerge(S16, Reg); + auto Unmerge = B.buildUnmerge(I16, Reg); for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) PackedRegs.push_back(Unmerge.getReg(I)); - PackedRegs.resize(6, B.buildUndef(S16).getReg(0)); - Reg = B.buildBuildVector(LLT::fixed_vector(6, S16), PackedRegs).getReg(0); - return B.buildBitcast(LLT::fixed_vector(3, S32), Reg).getReg(0); + PackedRegs.resize(6, B.buildUndef(I16).getReg(0)); + Reg = B.buildBuildVector(V6I16, PackedRegs).getReg(0); + return B.buildBitcast(V3I32, Reg).getReg(0); } if (StoreVT.getNumElements() == 4) { SmallVector PackedRegs; - Reg = B.buildBitcast(LLT::fixed_vector(2, S32), Reg).getReg(0); - auto Unmerge = B.buildUnmerge(S32, Reg); + Reg = B.buildBitcast(V2I32, Reg).getReg(0); + auto Unmerge = B.buildUnmerge(I32, Reg); for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) PackedRegs.push_back(Unmerge.getReg(I)); - PackedRegs.resize(4, B.buildUndef(S32).getReg(0)); - return B.buildBuildVector(LLT::fixed_vector(4, S32), PackedRegs) - .getReg(0); + PackedRegs.resize(4, B.buildUndef(I32).getReg(0)); + return B.buildBuildVector(V4I32, PackedRegs).getReg(0); } llvm_unreachable("invalid data type"); } - if (StoreVT == LLT::fixed_vector(3, S16)) { - Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg) - .getReg(0); + if (StoreVT.isFixedVector(3, 16)) { + Reg = B.buildPadVectorWithUndefElements(V4I16, Reg).getReg(0); } return Reg; } @@ -5849,8 +5933,6 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B, MachineRegisterInfo *MRI = B.getMRI(); LLT Ty = MRI->getType(VData); - const LLT S16 = LLT::scalar(16); - // Fixup buffer resources themselves needing to be v4i128. if (hasBufferRsrcWorkaround(Ty)) return castBufferRsrcToV4I32(VData, B); @@ -5860,13 +5942,15 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B, VData = B.buildBitcast(Ty, VData).getReg(0); } // Fixup illegal register types for i8 stores. - if (Ty == LLT::scalar(8) || Ty == S16) { - Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0); + if (Ty.isScalar(8) || Ty.isScalar(16)) { + if (Ty.isFloat()) + VData = B.buildBitcast(I16, VData).getReg(0); + Register AnyExt = B.buildAnyExt(I32, VData).getReg(0); return AnyExt; } if (Ty.isVector()) { - if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) { + if (Ty.getElementType().isScalar(16) && Ty.getNumElements() <= 4) { if (IsFormat) return handleD16VData(B, *MRI, VData); } @@ -5886,7 +5970,6 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI, LLT Ty = MRI.getType(VData); LLT EltTy = Ty.getScalarType(); const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16); - const LLT S32 = LLT::scalar(32); MachineMemOperand *MMO = *MI.memoperands_begin(); const int MemSize = MMO->getSize().getValue(); @@ -5910,7 +5993,7 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI, VIndex = MI.getOperand(3).getReg(); OpOffset = 1; } else { - VIndex = B.buildConstant(S32, 0).getReg(0); + VIndex = B.buildConstant(I32, 0).getReg(0); } Register VOffset = MI.getOperand(3 + OpOffset).getReg(); @@ -5998,7 +6081,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, // FIXME: Verifier should enforce 1 MMO for these intrinsics. MachineMemOperand *MMO = *MI.memoperands_begin(); const LLT MemTy = MMO->getMemoryType(); - const LLT S32 = LLT::scalar(32); Register Dst = MI.getOperand(0).getReg(); @@ -6024,7 +6106,7 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, VIndex = MI.getOperand(3 + OpOffset).getReg(); ++OpOffset; } else { - VIndex = B.buildConstant(S32, 0).getReg(0); + VIndex = B.buildConstant(I32, 0).getReg(0); } Register VOffset = MI.getOperand(3 + OpOffset).getReg(); @@ -6040,6 +6122,14 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, unsigned ImmOffset; LLT Ty = MRI.getType(Dst); + if (Ty.isFloat() || Ty.isFloatVector()) { + Observer.changingInstr(MI); + Ty = Ty.changeToInteger(); + Helper.bitcastDst(MI, Ty, 0); + Observer.changedInstr(MI); + Dst = MI.getOperand(0).getReg(); + B.setInsertPt(B.getMBB(), MI); + } // Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the // logic doesn't have to handle that case. if (hasBufferRsrcWorkaround(Ty)) { @@ -6101,20 +6191,24 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, if (IsTFE) { unsigned NumValueDWords = divideCeil(Ty.getSizeInBits(), 32); unsigned NumLoadDWords = NumValueDWords + 1; - LLT LoadTy = LLT::fixed_vector(NumLoadDWords, S32); + LLT LoadTy = LLT::fixed_vector(NumLoadDWords, I32); Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(LoadTy); buildBufferLoad(Opc, LoadDstReg, RSrc, VIndex, VOffset, SOffset, ImmOffset, Format, AuxiliaryData, MMO, IsTyped, HasVIndex, B); if (MemTy.getSizeInBits() < 32) { - Register ExtDst = B.getMRI()->createGenericVirtualRegister(S32); + Register ExtDst = B.getMRI()->createGenericVirtualRegister(I32); B.buildUnmerge({ExtDst, StatusDst}, LoadDstReg); - B.buildTrunc(Dst, ExtDst); + if (Ty.isFloat()) { + B.buildBitcast(Dst, B.buildTrunc(I16, ExtDst)); + } else { + B.buildTrunc(Dst, ExtDst); + } } else if (NumValueDWords == 1) { B.buildUnmerge({Dst, StatusDst}, LoadDstReg); } else { SmallVector LoadElts; for (unsigned I = 0; I != NumValueDWords; ++I) - LoadElts.push_back(B.getMRI()->createGenericVirtualRegister(S32)); + LoadElts.push_back(B.getMRI()->createGenericVirtualRegister(I32)); LoadElts.push_back(StatusDst); B.buildUnmerge(LoadElts, LoadDstReg); LoadElts.truncate(NumValueDWords); @@ -6122,23 +6216,29 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, } } else if ((!IsD16 && MemTy.getSizeInBits() < 32) || (IsD16 && !Ty.isVector())) { - Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(S32); + Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(I32); buildBufferLoad(Opc, LoadDstReg, RSrc, VIndex, VOffset, SOffset, ImmOffset, Format, AuxiliaryData, MMO, IsTyped, HasVIndex, B); B.setInsertPt(B.getMBB(), ++B.getInsertPt()); - B.buildTrunc(Dst, LoadDstReg); + if (Ty.isFloat()) { + B.buildBitcast(Dst, B.buildTrunc(I16, LoadDstReg)); + } else { + B.buildTrunc(Dst, LoadDstReg); + } } else if (Unpacked && IsD16 && Ty.isVector()) { - LLT UnpackedTy = Ty.changeElementSize(32); + LLT UnpackedTy = Ty.changeElementType(I32); Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(UnpackedTy); buildBufferLoad(Opc, LoadDstReg, RSrc, VIndex, VOffset, SOffset, ImmOffset, Format, AuxiliaryData, MMO, IsTyped, HasVIndex, B); B.setInsertPt(B.getMBB(), ++B.getInsertPt()); // FIXME: G_TRUNC should work, but legalization currently fails - auto Unmerge = B.buildUnmerge(S32, LoadDstReg); + auto Unmerge = B.buildUnmerge(I32, LoadDstReg); SmallVector Repack; for (unsigned I = 0, N = Unmerge->getNumOperands() - 1; I != N; ++I) Repack.push_back(B.buildTrunc(EltTy, Unmerge.getReg(I)).getReg(0)); + B.buildMergeLikeInstr(Dst, Repack); + } else { buildBufferLoad(Opc, Dst, RSrc, VIndex, VOffset, SOffset, ImmOffset, Format, AuxiliaryData, MMO, IsTyped, HasVIndex, B); @@ -6271,7 +6371,7 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, VIndex = MI.getOperand(4 + OpOffset).getReg(); ++OpOffset; } else { - VIndex = B.buildConstant(LLT::scalar(32), 0).getReg(0); + VIndex = B.buildConstant(I32, 0).getReg(0); } Register VOffset = MI.getOperand(4 + OpOffset).getReg(); @@ -6310,8 +6410,6 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, unsigned ArgOffset, const AMDGPU::ImageDimIntrinsicInfo *Intr, bool IsA16, bool IsG16) { - const LLT S16 = LLT::scalar(16); - const LLT V2S16 = LLT::fixed_vector(2, 16); auto EndIdx = Intr->VAddrEnd; for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) { @@ -6325,18 +6423,18 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, (I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) || (I >= Intr->CoordStart && !IsA16)) { if ((I < Intr->GradientStart) && IsA16 && - (B.getMRI()->getType(AddrReg) == S16)) { + (B.getMRI()->getType(AddrReg).isScalar(16))) { assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument"); // Special handling of bias when A16 is on. Bias is of type half but // occupies full 32-bit. PackedAddrs.push_back( - B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) + B.buildBuildVector(V2I16, {AddrReg, B.buildUndef(I16).getReg(0)}) .getReg(0)); } else { assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) && "Bias needs to be converted to 16 bit in A16 mode"); // Handle any gradient or coordinate operands that should not be packed - AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0); + AddrReg = B.buildBitcast(V2I16, AddrReg).getReg(0); PackedAddrs.push_back(AddrReg); } } else { @@ -6351,12 +6449,12 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, // Check for _L to _LZ optimization !MI.getOperand(ArgOffset + I + 1).isReg()) { PackedAddrs.push_back( - B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) + B.buildBuildVector(V2I16, {AddrReg, B.buildUndef(I16).getReg(0)}) .getReg(0)); } else { PackedAddrs.push_back( B.buildBuildVector( - V2S16, {AddrReg, MI.getOperand(ArgOffset + I + 1).getReg()}) + V2I16, {AddrReg, MI.getOperand(ArgOffset + I + 1).getReg()}) .getReg(0)); ++I; } @@ -6368,21 +6466,19 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, /// and replace the remaining operands with $noreg. static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI, int DimIdx, int NumVAddrs) { - const LLT S32 = LLT::scalar(32); - (void)S32; SmallVector AddrRegs; for (int I = 0; I != NumVAddrs; ++I) { MachineOperand &SrcOp = MI.getOperand(DimIdx + I); if (SrcOp.isReg()) { AddrRegs.push_back(SrcOp.getReg()); - assert(B.getMRI()->getType(SrcOp.getReg()) == S32); + assert(B.getMRI()->getType(SrcOp.getReg()).isScalar(32)); } } int NumAddrRegs = AddrRegs.size(); if (NumAddrRegs != 1) { auto VAddr = - B.buildBuildVector(LLT::fixed_vector(NumAddrRegs, 32), AddrRegs); + B.buildBuildVector(LLT::fixed_vector(NumAddrRegs, I32), AddrRegs); MI.getOperand(DimIdx).setReg(VAddr.getReg(0)); } @@ -6407,7 +6503,7 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI, /// the intrinsic's arguments. In cases like a16 addresses, this requires /// padding now unnecessary arguments with $noreg. bool AMDGPULegalizerInfo::legalizeImageIntrinsic( - MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, + MachineInstr &MI, MachineIRBuilder &B, LegalizerHelper &Helper, const AMDGPU::ImageDimIntrinsicInfo *Intr) const { const MachineFunction &MF = *MI.getMF(); @@ -6420,35 +6516,13 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // TODO: Do we need to guard against already legalized intrinsics? const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); - MachineRegisterInfo *MRI = B.getMRI(); - const LLT S32 = LLT::scalar(32); - const LLT S16 = LLT::scalar(16); - const LLT V2S16 = LLT::fixed_vector(2, 16); - unsigned DMask = 0; - Register VData; - LLT Ty; - - if (!BaseOpcode->NoReturn || BaseOpcode->Store) { - VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg(); - Ty = MRI->getType(VData); - } const bool IsAtomicPacked16Bit = (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 || BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16); - // Check for 16 bit addresses and pack if true. - LLT GradTy = - MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg()); - LLT AddrTy = - MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg()); - const bool IsG16 = - ST.hasG16() ? (BaseOpcode->Gradients && GradTy == S16) : GradTy == S16; - const bool IsA16 = AddrTy == S16; - const bool IsD16 = !IsAtomicPacked16Bit && Ty.getScalarType() == S16; - int DMaskLanes = 0; if (!BaseOpcode->Atomic) { DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); @@ -6464,9 +6538,53 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } } + GISelChangeObserver &Observer = Helper.Observer; Observer.changingInstr(MI); auto ChangedInstr = make_scope_exit([&] { Observer.changedInstr(MI); }); + for (auto &Op : MI.all_defs()) { + LLT Ty = MRI->getType(Op.getReg()); + LLT NewTy = Ty.changeToInteger(); + + if (!Ty.isFloat() && !Ty.isFloatVector()) + continue; + + unsigned OpIdx = Op.getOperandNo(); + Helper.bitcastDst(MI, NewTy, OpIdx); + } + + B.setInsertPt(*MI.getParent(), MI.getIterator()); + + for (auto &Op : MI.all_uses()) { + LLT Ty = MRI->getType(Op.getReg()); + LLT NewTy = Ty.changeToInteger(); + + if (!Ty.isFloat() && !Ty.isFloatVector()) + continue; + + unsigned OpIdx = Op.getOperandNo(); + Helper.bitcastSrc(MI, NewTy, OpIdx); + } + + Register VData; + LLT Ty; + + if (!BaseOpcode->NoReturn || BaseOpcode->Store) { + VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg(); + Ty = MRI->getType(VData); + } + + // Check for 16 bit addresses and pack if true. + LLT GradTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg()); + LLT AddrTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg()); + const bool IsG16 = ST.hasG16() + ? (BaseOpcode->Gradients && GradTy.isScalar(16)) + : GradTy.isScalar(16); + const bool IsA16 = AddrTy.isScalar(16); + const bool IsD16 = !IsAtomicPacked16Bit && Ty.getScalarType().isScalar(16); + const unsigned StoreOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16 : AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE; const unsigned LoadOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 @@ -6540,13 +6658,13 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (UsePartialNSA) { // Pack registers that would go over NSAMaxSize into last VAddr register LLT PackedAddrTy = - LLT::fixed_vector(2 * (PackedRegs.size() - NSAMaxSize + 1), 16); + LLT::fixed_vector(2 * (PackedRegs.size() - NSAMaxSize + 1), I16); auto Concat = B.buildConcatVectors( PackedAddrTy, ArrayRef(PackedRegs).slice(NSAMaxSize - 1)); PackedRegs[NSAMaxSize - 1] = Concat.getReg(0); PackedRegs.resize(NSAMaxSize); } else if (!UseNSA && PackedRegs.size() > 1) { - LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16); + LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), I16); auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs); PackedRegs[0] = Concat.getReg(0); PackedRegs.resize(1); @@ -6655,17 +6773,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (IsD16 && ST.hasUnpackedD16VMem()) { RoundedTy = - LLT::scalarOrVector(ElementCount::getFixed(AdjustedNumElts), 32); - TFETy = LLT::fixed_vector(AdjustedNumElts + 1, 32); - RegTy = S32; + LLT::scalarOrVector(ElementCount::getFixed(AdjustedNumElts), I32); + TFETy = LLT::fixed_vector(AdjustedNumElts + 1, I32); + RegTy = I32; } else { unsigned EltSize = EltTy.getSizeInBits(); unsigned RoundedElts = (AdjustedTy.getSizeInBits() + 31) / 32; unsigned RoundedSize = 32 * RoundedElts; RoundedTy = LLT::scalarOrVector( - ElementCount::getFixed(RoundedSize / EltSize), EltSize); - TFETy = LLT::fixed_vector(RoundedSize / 32 + 1, S32); - RegTy = !IsTFE && EltSize == 16 ? V2S16 : S32; + ElementCount::getFixed(RoundedSize / EltSize), EltTy); + TFETy = LLT::fixed_vector(RoundedSize / 32 + 1, I32); + RegTy = !IsTFE && EltSize == 16 ? V2I16 : I32; } // The return type does not need adjustment. @@ -6694,14 +6812,14 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (IsTFE) { Dst1Reg = MI.getOperand(1).getReg(); - if (MRI->getType(Dst1Reg) != S32) + if (!MRI->getType(Dst1Reg).isScalar(32)) return false; // TODO: Make sure the TFE operand bit is set. MI.removeOperand(1); // Handle the easy case that requires no repack instructions. - if (Ty == S32) { + if (Ty.isScalar(32)) { B.buildUnmerge({DstReg, Dst1Reg}, NewResultReg); return true; } @@ -6736,7 +6854,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } // Avoid a build/concat_vector of 1 entry. - if (Ty == V2S16 && NumDataRegs == 1 && !ST.hasUnpackedD16VMem()) { + if (Ty.isFixedVector(2, 16) && NumDataRegs == 1 && !ST.hasUnpackedD16VMem()) { B.buildBitcast(DstReg, ResultRegs[0]); return true; } @@ -6749,12 +6867,12 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // // TODO: We don't really need to use load s32 elements. We would only need one // cast for the TFE result if a multiple of v2s16 was used. - if (RegTy != V2S16 && !ST.hasUnpackedD16VMem()) { + if (!RegTy.isFixedVector(2, 16) && !ST.hasUnpackedD16VMem()) { for (Register &Reg : ResultRegs) - Reg = B.buildBitcast(V2S16, Reg).getReg(0); + Reg = B.buildBitcast(V2I16, Reg).getReg(0); } else if (ST.hasUnpackedD16VMem()) { for (Register &Reg : ResultRegs) - Reg = B.buildTrunc(S16, Reg).getReg(0); + Reg = B.buildTrunc(I16, Reg).getReg(0); } } @@ -6774,18 +6892,16 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( return true; } - assert(!ST.hasUnpackedD16VMem() && ResTy == V2S16); + assert(!ST.hasUnpackedD16VMem() && ResTy.isFixedVector(2, 16)); const int RegsToCover = (Ty.getSizeInBits() + 31) / 32; // Deal with the one annoying legal case. - const LLT V3S16 = LLT::fixed_vector(3, 16); - if (Ty == V3S16) { + if (Ty.isFixedVector(3, 16)) { if (IsTFE) { if (ResultRegs.size() == 1) { NewResultReg = ResultRegs[0]; } else if (ResultRegs.size() == 2) { - LLT V4S16 = LLT::fixed_vector(4, 16); - NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0); + NewResultReg = B.buildConcatVectors(V4I16, ResultRegs).getReg(0); } else { return false; } @@ -6822,7 +6938,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper, : AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT; // The 8-bit and 16-bit scalar buffer load instructions have 32-bit // destination register. - Dst = B.getMRI()->createGenericVirtualRegister(LLT::scalar(32)); + Dst = B.getMRI()->createGenericVirtualRegister(I32); } else { Opc = AMDGPU::G_AMDGPU_S_BUFFER_LOAD; Dst = OrigDst; @@ -6835,7 +6951,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper, Ty = castBufferRsrcFromV4I32(MI, B, *B.getMRI(), 0); B.setInsertPt(B.getMBB(), MI); } - if (shouldBitcastLoadStoreType(ST, Ty, LLT::scalar(Size))) { + if (shouldBitcastLoadStoreType(ST, Ty, LLT::integer(Size))) { Ty = getBitcastRegisterType(Ty); Helper.bitcastDst(MI, Ty, 0); B.setInsertPt(B.getMBB(), MI); @@ -6860,7 +6976,13 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper, if (Dst != OrigDst) { MI.getOperand(0).setReg(Dst); B.setInsertPt(B.getMBB(), ++B.getInsertPt()); - B.buildTrunc(OrigDst, Dst); + + if (Ty.isFloat()) { + auto Trunc = B.buildTrunc(Ty.changeToInteger(), Dst); + B.buildBitcast(OrigDst, Trunc); + } else { + B.buildTrunc(OrigDst, Dst); + } } // If we don't have 96-bit result scalar loads, widening to 128-bit should @@ -6870,7 +6992,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper, if (Ty.isVector()) Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0); else - Helper.widenScalarDst(MI, getPow2ScalarType(Ty), 0); + Helper.widenScalarDst(MI, getPow2IntegerType(Ty), 0); } Observer.changedInstr(MI); @@ -6933,8 +7055,6 @@ bool AMDGPULegalizerInfo::legalizeTrapEndpgm( bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { MachineFunction &MF = B.getMF(); - const LLT S64 = LLT::scalar(64); - Register SGPR01(AMDGPU::SGPR0_SGPR1); // For code object version 5, queue_ptr is passed through implicit kernarg. if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >= @@ -6957,15 +7077,15 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( PtrInfo, MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, - LLT::scalar(64), commonAlignment(Align(64), Offset)); + I64, commonAlignment(Align(64), Offset)); // Pointer address Register LoadAddr = MRI.createGenericVirtualRegister( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); B.buildPtrAdd(LoadAddr, KernargPtrReg, - B.buildConstant(LLT::scalar(64), Offset).getReg(0)); + B.buildConstant(I64, Offset).getReg(0)); // Load address - Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0); + Register Temp = B.buildLoad(I64, LoadAddr, *MMO).getReg(0); B.buildCopy(SGPR01, Temp); B.buildInstr(AMDGPU::S_TRAP) .addImm(static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap)) @@ -7033,11 +7153,6 @@ bool AMDGPULegalizerInfo::legalizeDebugTrap(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( MachineInstr &MI, MachineIRBuilder &B) const { MachineRegisterInfo &MRI = *B.getMRI(); - const LLT S16 = LLT::scalar(16); - const LLT S32 = LLT::scalar(32); - const LLT V2S16 = LLT::fixed_vector(2, 16); - const LLT V3S32 = LLT::fixed_vector(3, 32); - Register DstReg = MI.getOperand(0).getReg(); Register NodePtr = MI.getOperand(2).getReg(); Register RayExtent = MI.getOperand(3).getReg(); @@ -7046,6 +7161,13 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( Register RayInvDir = MI.getOperand(6).getReg(); Register TDescr = MI.getOperand(7).getReg(); + // LLT DstTy = MRI.getType(DstReg); + LLT NodePtrTy = MRI.getType(NodePtr); + LLT RayExtentTy = MRI.getType(RayExtent); + LLT RayOriginTy = MRI.getType(RayOrigin); + LLT RayDirTy = MRI.getType(RayDir); + LLT RayInvDirTy = MRI.getType(RayInvDir); + if (!ST.hasGFX10_AEncoding()) { DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(), "intrinsic not supported on subtarget", @@ -7057,14 +7179,24 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( const bool IsGFX11 = AMDGPU::isGFX11(ST); const bool IsGFX11Plus = AMDGPU::isGFX11Plus(ST); const bool IsGFX12Plus = AMDGPU::isGFX12Plus(ST); - const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16; - const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64; + const bool IsA16 = RayDirTy.getElementType().isFloat(16); + const bool Is64 = NodePtrTy.isInteger(64); const unsigned NumVDataDwords = 4; const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords; const bool UseNSA = IsGFX12Plus || (ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize()); + assert(NodePtrTy.isInteger(32) || NodePtrTy.isInteger(64)); + assert(RayExtentTy.isFloat(32)); + assert(RayOriginTy.isFixedVector(3, 32) && RayOriginTy.isFloatVector()); + assert((RayDirTy.isFixedVector(3, 32) || RayDirTy.isFixedVector(3, 16)) && + RayDirTy.isFloatVector()); + assert( + (RayInvDirTy.isFixedVector(3, 32) || RayInvDirTy.isFixedVector(3, 16)) && + RayInvDirTy.isFloatVector()); + assert(RayDirTy == RayInvDirTy); + const unsigned BaseOpcodes[2][2] = { {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16}, {AMDGPU::IMAGE_BVH64_INTERSECT_RAY, @@ -7085,34 +7217,39 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( } assert(Opcode != -1); + auto CastRayExtent = B.buildBitcast(I32, RayExtent); + SmallVector Ops; if (UseNSA && IsGFX11Plus) { - auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) { - auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); + auto packLanes = [&Ops, &B](Register Src) { + auto Bitcast = B.buildBitcast(V3I32, Src); + auto Unmerge = B.buildUnmerge({I32, I32, I32}, Bitcast); auto Merged = B.buildMergeLikeInstr( - V3S32, {Unmerge.getReg(0), Unmerge.getReg(1), Unmerge.getReg(2)}); + V3I32, {Unmerge.getReg(0), Unmerge.getReg(1), Unmerge.getReg(2)}); Ops.push_back(Merged.getReg(0)); }; Ops.push_back(NodePtr); - Ops.push_back(RayExtent); + Ops.push_back(CastRayExtent.getReg(0)); packLanes(RayOrigin); if (IsA16) { - auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); - auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); + auto BitcastRayDir = B.buildBitcast(V3I16, RayDir); + auto UnmergeRayDir = B.buildUnmerge({I16, I16, I16}, BitcastRayDir); + auto BitcastRayInvDir = B.buildBitcast(V3I16, RayInvDir); + auto UnmergeRayInvDir = B.buildUnmerge({I16, I16, I16}, BitcastRayInvDir); auto MergedDir = B.buildMergeLikeInstr( - V3S32, + V3I32, {B.buildBitcast( - S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(0), + I32, B.buildMergeLikeInstr(V2I16, {UnmergeRayInvDir.getReg(0), UnmergeRayDir.getReg(0)})) .getReg(0), B.buildBitcast( - S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(1), + I32, B.buildMergeLikeInstr(V2I16, {UnmergeRayInvDir.getReg(1), UnmergeRayDir.getReg(1)})) .getReg(0), B.buildBitcast( - S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(2), + I32, B.buildMergeLikeInstr(V2I16, {UnmergeRayInvDir.getReg(2), UnmergeRayDir.getReg(2)})) .getReg(0)}); Ops.push_back(MergedDir.getReg(0)); @@ -7122,16 +7259,17 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( } } else { if (Is64) { - auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr); + auto Unmerge = B.buildUnmerge({I32, I32}, NodePtr); Ops.push_back(Unmerge.getReg(0)); Ops.push_back(Unmerge.getReg(1)); } else { Ops.push_back(NodePtr); } - Ops.push_back(RayExtent); + Ops.push_back(CastRayExtent.getReg(0)); - auto packLanes = [&Ops, &S32, &B](Register Src) { - auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); + auto packLanes = [&Ops, &B](Register Src) { + auto Bitcast = B.buildBitcast(V3I32, Src); + auto Unmerge = B.buildUnmerge({I32, I32, I32}, Bitcast); Ops.push_back(Unmerge.getReg(0)); Ops.push_back(Unmerge.getReg(1)); Ops.push_back(Unmerge.getReg(2)); @@ -7139,11 +7277,13 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( packLanes(RayOrigin); if (IsA16) { - auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); - auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); - Register R1 = MRI.createGenericVirtualRegister(S32); - Register R2 = MRI.createGenericVirtualRegister(S32); - Register R3 = MRI.createGenericVirtualRegister(S32); + auto BitcastRayDir = B.buildBitcast(V3I16, RayDir); + auto UnmergeRayDir = B.buildUnmerge({I16, I16, I16}, BitcastRayDir); + auto BitcastRayInvDir = B.buildBitcast(V3I16, RayInvDir); + auto UnmergeRayInvDir = B.buildUnmerge({I16, I16, I16}, BitcastRayInvDir); + Register R1 = MRI.createGenericVirtualRegister(I32); + Register R2 = MRI.createGenericVirtualRegister(I32); + Register R3 = MRI.createGenericVirtualRegister(I32); B.buildMergeLikeInstr(R1, {UnmergeRayDir.getReg(0), UnmergeRayDir.getReg(1)}); B.buildMergeLikeInstr( @@ -7161,7 +7301,7 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( if (!UseNSA) { // Build a single vector containing all the operands so far prepared. - LLT OpTy = LLT::fixed_vector(Ops.size(), 32); + LLT OpTy = LLT::fixed_vector(Ops.size(), I32); Register MergedOps = B.buildMergeLikeInstr(OpTy, Ops).getReg(0); Ops.clear(); Ops.push_back(MergedOps); @@ -7198,11 +7338,10 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. if (!ST.hasArchitectedSGPRs()) return false; - LLT S32 = LLT::scalar(32); Register DstReg = MI.getOperand(0).getReg(); - auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8)); - auto LSB = B.buildConstant(S32, 25); - auto Width = B.buildConstant(S32, 5); + auto TTMP8 = B.buildCopy(I32, Register(AMDGPU::TTMP8)); + auto LSB = B.buildConstant(I32, 25); + auto Width = B.buildConstant(I32, 5); B.buildUbfx(DstReg, TTMP8, LSB, Width); MI.eraseFromParent(); return true; @@ -7218,15 +7357,15 @@ bool AMDGPULegalizerInfo::legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Register Src = MI.getOperand(0).getReg(); - if (MRI.getType(Src) != S64) + if (MRI.getType(Src) != I64) return false; auto ModeReg = - B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32}, + B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {I32}, /*HasSideEffects=*/true, /*isConvergent=*/false) .addImm(FPEnvModeBitField); auto TrapReg = - B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32}, + B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {I32}, /*HasSideEffects=*/true, /*isConvergent=*/false) .addImm(FPEnvTrapBitField); B.buildMergeLikeInstr(Src, {ModeReg, TrapReg}); @@ -7238,10 +7377,10 @@ bool AMDGPULegalizerInfo::legalizeSetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Register Src = MI.getOperand(0).getReg(); - if (MRI.getType(Src) != S64) + if (!MRI.getType(Src).isScalar(64)) return false; - auto Unmerge = B.buildUnmerge({S32, S32}, MI.getOperand(0)); + auto Unmerge = B.buildUnmerge({I32, I32}, MI.getOperand(0)); B.buildIntrinsic(Intrinsic::amdgcn_s_setreg, ArrayRef(), /*HasSideEffects=*/true, /*isConvergent=*/false) .addImm(static_cast(FPEnvModeBitField)) @@ -7540,18 +7679,16 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8: case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: { Register Index = MI.getOperand(5).getReg(); - LLT S32 = LLT::scalar(32); - if (MRI.getType(Index) != S32) - MI.getOperand(5).setReg(B.buildAnyExt(S32, Index).getReg(0)); + if (!MRI.getType(Index).isInteger(32)) + MI.getOperand(5).setReg(B.buildAnyExt(I32, Index).getReg(0)); return true; } case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4: case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8: case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: { Register Index = MI.getOperand(7).getReg(); - LLT S32 = LLT::scalar(32); - if (MRI.getType(Index) != S32) - MI.getOperand(7).setReg(B.buildAnyExt(S32, Index).getReg(0)); + if (!MRI.getType(Index).isInteger(32)) + MI.getOperand(7).setReg(B.buildAnyExt(I32, Index).getReg(0)); return true; } case Intrinsic::amdgcn_fmed3: { @@ -7581,7 +7718,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) - return legalizeImageIntrinsic(MI, B, Helper.Observer, ImageDimIntr); + return legalizeImageIntrinsic(MI, B, Helper, ImageDimIntr); return true; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index a98e8ba7aaaf1..fab3631dfbbc7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -14,9 +14,10 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "AMDGPUArgumentUsageInfo.h" #include "SIInstrInfo.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" namespace llvm { @@ -77,6 +78,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const; bool legalizeStore(LegalizerHelper &Helper, MachineInstr &MI) const; + bool legalizeFPExt(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFPTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; @@ -222,8 +228,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { MachineIRBuilder &B) const; bool legalizeImageIntrinsic( - MachineInstr &MI, MachineIRBuilder &B, - GISelChangeObserver &Observer, + MachineInstr &MI, MachineIRBuilder &B, LegalizerHelper &Helper, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const; bool legalizeSBufferLoad(LegalizerHelper &Helper, MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index f1502f5b71017..bbff60b5a3187 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -226,20 +226,21 @@ bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat( void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat( MachineInstr &MI) const { - const LLT S32 = LLT::scalar(32); + const LLT I32 = LLT::integer(32); + const LLT F32 = LLT::float32(); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy != S32) - SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); + if (!SrcTy.isInteger(32)) + SrcReg = B.buildAnyExtOrTrunc(I32, SrcReg).getReg(0); - if (Ty == S32) { + if (Ty.isFloat(32)) { B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, MI.getFlags()); } else { - auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, + auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {F32}, {SrcReg}, MI.getFlags()); B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index b7255eefacd90..c227b859b93d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1996,7 +1996,7 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect( else EltTy = MRI.getType(DstRegs[0]); - if (VecTy.isFloatVector()) { + if (VecTy.isFloatVector() && !EltTy.isFloat()) { auto ClassOrBank = MRI.getRegClassOrRegBank(VecReg); VecReg = B.buildBitcast({ClassOrBank, VecTy.changeToInteger()}, VecReg).getReg(0); } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 02a5d50ff3ae6..fcd62f8c8a8e7 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1553,7 +1553,6 @@ def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in let True16Predicate = p in { @@ -1590,7 +1589,10 @@ def : FlatStorePat ; def : FlatLoadPat ; } -def : FlatStorePat ; +foreach vt = Reg96Types.types in { +def : FlatLoadPat ; +def : FlatStorePat ; +} foreach vt = VReg_128.RegTypes in { def : FlatLoadPat ; From 432ff094e6aa7ba3bc07cabf6b8f32e7283c8b11 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Mon, 3 Mar 2025 13:41:43 +0000 Subject: [PATCH 09/16] FPInfo: AMDGPU InstructionSelector --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 6 +++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 27 +++++++++++++++++++ .../AMDGPU/AMDGPUInstructionSelector.cpp | 4 +-- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index a35ecae5d18bf..e1c11d55995de 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -241,6 +241,9 @@ struct DefinitionAndSourceRegister { std::optional getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); +std::optional +getDefSrcRegIgnoringBitcasts(Register Reg, const MachineRegisterInfo &MRI); + /// Find the def instruction for \p Reg, folding away any trivial copies. May /// return nullptr if \p Reg is not a generic virtual register. /// @@ -248,6 +251,9 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); MachineInstr *getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); +MachineInstr *getDefIgnoringBitcasts(Register Reg, + const MachineRegisterInfo &MRI); + /// Find the source register for \p Reg, folding away any trivial copies. It /// will be an output register of the instruction that getDefIgnoringCopies /// returns. May return an invalid register if \p Reg is not a generic virtual diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 625d556e3ff5e..1d7fe92346fc4 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -483,6 +483,33 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { return DefinitionAndSourceRegister{DefMI, DefSrcReg}; } +std::optional +llvm::getDefSrcRegIgnoringBitcasts(Register Reg, const MachineRegisterInfo &MRI) { + Register DefSrcReg = Reg; + auto *DefMI = MRI.getVRegDef(Reg); + auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); + if (!DstTy.isValid()) + return std::nullopt; + unsigned Opc = DefMI->getOpcode(); + while (Opc == TargetOpcode::G_BITCAST || isPreISelGenericOptimizationHint(Opc)) { + Register SrcReg = DefMI->getOperand(1).getReg(); + auto SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isValid()) + break; + DefMI = MRI.getVRegDef(SrcReg); + DefSrcReg = SrcReg; + Opc = DefMI->getOpcode(); + } + return DefinitionAndSourceRegister{DefMI, DefSrcReg}; +} + +MachineInstr *llvm::getDefIgnoringBitcasts(Register Reg, + const MachineRegisterInfo &MRI) { +std::optional DefSrcReg = +getDefSrcRegIgnoringBitcasts(Reg, MRI); +return DefSrcReg ? DefSrcReg->MI : nullptr; +} + MachineInstr *llvm::getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { std::optional DefSrcReg = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 60cfc531868ec..18db3d4fab47b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4282,12 +4282,12 @@ std::pair AMDGPUInstructionSelector::selectVOP3PModsImpl( Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const { unsigned Mods = 0; - MachineInstr *MI = MRI.getVRegDef(Src); + MachineInstr *MI = getDefIgnoringBitcasts(Src, MRI); if (MI->getOpcode() == AMDGPU::G_FNEG && // It's possible to see an f32 fneg here, but unlikely. // TODO: Treat f32 fneg as only high bit. - MRI.getType(Src).isFixedVector(2, 16)) { + MRI.getType(MI->getOperand(0).getReg()) == LLT::fixed_vector(2, LLT::float16())) { Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); Src = MI->getOperand(1).getReg(); MI = MRI.getVRegDef(Src); From dabbfd2d2b0403e4b91a4952b0c37dd570182cb9 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 12 Feb 2025 13:53:47 +0100 Subject: [PATCH 10/16] FPInfo: LegalizerHelper --- .../GlobalISel/LegalizationArtifactCombiner.h | 60 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 578 ++++++++++-------- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +- .../CodeGen/GlobalISel/LegalizerInfoTest.cpp | 130 ++-- 4 files changed, 438 insertions(+), 332 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 0d2ff098a15e3..dda353626c051 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "legalizer" @@ -44,6 +45,7 @@ class LegalizationArtifactCombiner { case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_BITCAST: return true; default: return false; @@ -507,6 +509,53 @@ class LegalizationArtifactCombiner { markInstAndDefDead(MI, CastMI, DeadInsts); return true; } + } else if (CastOpc == TargetOpcode::G_BITCAST) { + + // %1:_(<2 x i32>) = G_BITCAST %0(<2 x f32>) + // %2:_(i16), %3:_(i16), %4:_(i16), %5:_(i16) = G_UNMERGE_VALUES %1 + // => + // %6:_(f32), %7:_(f32) = G_UNMERGE_VALUES %0 + // %8:_(i32) = G_BITCAST %6 + // %2:_(i16), %3:_(i16) = G_UNMERGE_VALUES %8 + // %9:_(i32) = G_BITCAST %7 + // %4:_(i16), %5:_(i16) = G_UNMERGE_VALUES %9 + + if (CastSrcTy.isScalar() || SrcTy.isScalar() || DestTy.isVector() || DestTy == SrcTy.getScalarType()) + return false; + + const unsigned NewNumDefs1 = CastSrcTy.getNumElements(); + const unsigned NewNumDefs2 = NumDefs / NewNumDefs1; + + if (NewNumDefs2 <= 1) + return false; + + SmallVector NewUnmergeRegs(NewNumDefs1); + for (unsigned Idx = 0; Idx < NewNumDefs1; ++Idx) + NewUnmergeRegs[Idx] = MRI.createGenericVirtualRegister(CastSrcTy.getElementType()); + + Builder.setInstr(MI); + auto NewUnmerge = Builder.buildUnmerge(NewUnmergeRegs, CastSrcReg); + + + SmallVector DstRegs(NumDefs); + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) + DstRegs[Idx] = MI.getOperand(Idx).getReg(); + + + auto* It = DstRegs.begin(); + + for (auto& Def : NewUnmerge->all_defs()) { + auto Bitcast = Builder.buildBitcast(SrcTy.getElementType(), Def); + auto* Begin = It; + It += NewNumDefs2; + ArrayRef Regs(Begin, It); + Builder.buildUnmerge(Regs, Bitcast); + } + + UpdatedDefs.append(NewUnmergeRegs.begin(), NewUnmergeRegs.end()); + UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); + markInstAndDefDead(MI, CastMI, DeadInsts); + return true; } // TODO: support combines with other casts as well @@ -1165,8 +1214,9 @@ class LegalizationArtifactCombiner { ++j, ++DefIdx) DstRegs.push_back(MI.getReg(DefIdx)); - if (ConvertOp) { - LLT MergeDstTy = MRI.getType(SrcDef->getOperand(0).getReg()); + LLT MergeDstTy = MRI.getType(SrcDef->getOperand(0).getReg()); + + if (ConvertOp && DestTy != MergeDstTy) { // This is a vector that is being split and casted. Extract to the // element type, and do the conversion on the scalars (or smaller @@ -1187,6 +1237,7 @@ class LegalizationArtifactCombiner { // %7(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %9 Register TmpReg = MRI.createGenericVirtualRegister(MergeEltTy); + assert(MRI.getType(TmpReg) != MRI.getType(MergeI->getOperand(Idx + 1).getReg())); Builder.buildInstr(ConvertOp, {TmpReg}, {MergeI->getOperand(Idx + 1).getReg()}); Builder.buildUnmerge(DstRegs, TmpReg); @@ -1232,7 +1283,7 @@ class LegalizationArtifactCombiner { ConvertOp = TargetOpcode::G_BITCAST; } - if (ConvertOp) { + if (ConvertOp && DestTy != MergeSrcTy) { Builder.setInstr(MI); for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { @@ -1240,6 +1291,7 @@ class LegalizationArtifactCombiner { Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); if (!MRI.use_empty(DefReg)) { + assert(MRI.getType(DefReg) != MRI.getType(MergeSrc)); Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); UpdatedDefs.push_back(DefReg); } @@ -1398,6 +1450,7 @@ class LegalizationArtifactCombiner { case TargetOpcode::G_EXTRACT: case TargetOpcode::G_TRUNC: case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_BITCAST: // Adding Use to ArtifactList. WrapperObserver.changedInstr(Use); break; @@ -1425,6 +1478,7 @@ class LegalizationArtifactCombiner { static Register getArtifactSrcReg(const MachineInstr &MI) { switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::G_BITCAST: case TargetOpcode::G_TRUNC: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c6cdf33dd8c6e..4e7e77a30e52e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -74,7 +74,7 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize), OrigTy.getElementType()); } else { - LeftoverTy = LLT::scalar(LeftoverSize); + LeftoverTy = LLT::integer(LeftoverSize); } int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); @@ -86,20 +86,25 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { if (!Ty.isScalar()) return nullptr; - switch (Ty.getSizeInBits()) { - case 16: + if (Ty.isIEEEFloat(16)) return Type::getHalfTy(Ctx); - case 32: + + if (Ty.isIEEEFloat(32)) return Type::getFloatTy(Ctx); - case 64: + + if (Ty.isIEEEFloat(64)) return Type::getDoubleTy(Ctx); - case 80: - return Type::getX86_FP80Ty(Ctx); - case 128: + + if (Ty.isIEEEFloat(128)) return Type::getFP128Ty(Ctx); - default: - return nullptr; - } + + if (Ty.isX86FP80()) + return Type::getX86_FP80Ty(Ctx); + + if (Ty.isPPCF128()) + return Type::getPPC_FP128Ty(Ctx); + + return nullptr; } LegalizerHelper::LegalizerHelper(MachineFunction &MF, @@ -278,7 +283,7 @@ LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, // Shift the sign bit of the low register through the high register. auto ShiftAmt = - MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); + MIRBuilder.buildConstant(LLT::integer(64), GCDTy.getSizeInBits() - 1); PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); } } @@ -1079,7 +1084,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, const CmpInst::Predicate ICmpPred, const DstOp &Res) -> Register { // FCMP libcall always returns an i32, and needs an ICMP with #0. - constexpr LLT TempLLT = LLT::scalar(32); + constexpr LLT TempLLT = LLT::integer(32); Register Temp = MRI.createGenericVirtualRegister(TempLLT); // Generate libcall, holding result in Temp const auto Status = createLibcall( @@ -1194,7 +1199,7 @@ LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, Type *StatePtrTy = PointerType::get(Ctx, AddrSpace); unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace); LLT MemTy = LLT::pointer(AddrSpace, PtrSize); - auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL); + auto DefValue = MIRBuilder.buildConstant(LLT::integer(PtrSize), -1LL); DstOp Dest(MRI.createGenericVirtualRegister(MemTy)); MIRBuilder.buildIntToPtr(Dest, DefValue); @@ -1479,7 +1484,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; SmallVector LeftoverRegs; if (LeftoverBits != 0) { - LeftoverTy = LLT::scalar(LeftoverBits); + LeftoverTy = LLT::integer(LeftoverBits); auto K = MIRBuilder.buildConstant( LeftoverTy, Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); @@ -2000,7 +2005,7 @@ Register LegalizerHelper::coerceToScalar(Register Val) { return Val; const DataLayout &DL = MIRBuilder.getDataLayout(); - LLT NewTy = LLT::scalar(Ty.getSizeInBits()); + LLT NewTy = LLT::integer(Ty.getSizeInBits()); if (Ty.isPointer()) { if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) return Register(); @@ -2149,12 +2154,12 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, // %10:_(s12) = G_MERGE_VALUES %8, %9 const int GCD = std::gcd(SrcSize, WideSize); - LLT GCDTy = LLT::scalar(GCD); + LLT GCDTy = LLT::integer(GCD); SmallVector Parts; SmallVector NewMergeRegs; SmallVector Unmerges; - LLT WideDstTy = LLT::scalar(NumMerge * WideSize); + LLT WideDstTy = LLT::integer(NumMerge * WideSize); // Decompose the original operands if they don't evenly divide. for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { @@ -2224,7 +2229,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; } - SrcTy = LLT::scalar(SrcTy.getSizeInBits()); + SrcTy = LLT::integer(SrcTy.getSizeInBits()); SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0); } @@ -2345,7 +2350,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) return UnableToLegalize; - LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); + LLT SrcAsIntTy = LLT::integer(SrcTy.getSizeInBits()); Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); SrcTy = SrcAsIntTy; } @@ -3156,9 +3161,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { LLT VecTy = MRI.getType(VecReg); Observer.changingInstr(MI); - widenScalarSrc( - MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1, - TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, LLT::vector(VecTy.getElementCount(), WideTy), 1, + TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy, 0); Observer.changedInstr(MI); @@ -3665,7 +3669,12 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, // Shift the wide element to get the target element. auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); - MIRBuilder.buildTrunc(Dst, ExtractedBits); + if (DstTy.isFloat()) { + auto Trunc = MIRBuilder.buildTrunc(DstTy.changeToInteger(), ExtractedBits); + MIRBuilder.buildBitcast(Dst, Trunc); + } else { + MIRBuilder.buildTrunc(Dst, ExtractedBits); + } MI.eraseFromParent(); return Legalized; } @@ -3790,7 +3799,7 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, // Check if bitcast is Legal auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); - LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits()); + LLT SrcScalTy = SrcTy.getScalarType(); // Check if the build vector is Legal if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) { @@ -3899,7 +3908,8 @@ LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; Idx /= AdjustAmt; - SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt); + SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), + LLT::integer(AdjustAmt)); auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src); auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx); MIRBuilder.buildBitcast(Dst, PromotedES); @@ -3967,8 +3977,10 @@ LegalizerHelper::bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; Idx /= AdjustAmt; - BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt); - SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt); + BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), + LLT::integer(AdjustAmt)); + SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), + LLT::integer(AdjustAmt)); auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec); auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec); auto PromotedIS = @@ -3997,7 +4009,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. - LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); + LLT WideMemTy = LLT::integer(MemStoreSizeInBits); MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy); @@ -4084,11 +4096,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { LLT PtrTy = MRI.getType(PtrReg); unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); + LLT AnyExtTy = LLT::integer(AnyExtSize); auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO); - auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), + auto OffsetCst = MIRBuilder.buildConstant(LLT::integer(PtrTy.getSizeInBits()), LargeSplitSize / 8); Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); @@ -4137,7 +4149,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - LLT WideTy = LLT::scalar(StoreSizeInBits); + LLT WideTy = LLT::integer(StoreSizeInBits); if (StoreSizeInBits > SrcTy.getSizeInBits()) { // Avoid creating a store with a narrower source than result. @@ -4181,10 +4193,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { // e.g. an s56 store being broken into s32 + s24, we might have a stored type // that's wider than the stored size. unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits()); - const LLT NewSrcTy = LLT::scalar(AnyExtSize); + const LLT NewSrcTy = LLT::integer(AnyExtSize); if (SrcTy.isPointer()) { - const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits()); + const LLT IntPtrTy = LLT::integer(SrcTy.getSizeInBits()); SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0); } @@ -4196,8 +4208,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { // Generate the PtrAdd and truncating stores. LLT PtrTy = MRI.getType(PtrReg); - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + auto OffsetCst = MIRBuilder.buildConstant(LLT::integer(PtrTy.getSizeInBits()), + LargeSplitSize / 8); auto SmallPtr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst); @@ -4227,7 +4239,7 @@ LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) { // We need to build an integer scalar of the vector bit pattern. // It's not legal for us to add padding when storing a vector. unsigned NumBits = MemTy.getSizeInBits(); - LLT IntTy = LLT::scalar(NumBits); + LLT IntTy = LLT::integer(NumBits); auto CurrVal = MIRBuilder.buildConstant(IntTy, 0); LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout())); @@ -5245,7 +5257,7 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, return UnableToLegalize; LLT PtrTy = MRI.getType(AddrReg); - const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + const LLT OffsetTy = LLT::integer(PtrTy.getSizeInBits()); unsigned TotalSize = ValTy.getSizeInBits(); @@ -5616,7 +5628,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( SVOps.push_back(MIRBuilder .buildExtractVectorElement( EltTy, Inputs[Input], - MIRBuilder.buildConstant(LLT::scalar(32), Idx)) + MIRBuilder.buildConstant(LLT::integer(32), Idx)) .getReg(0)); } @@ -5903,8 +5915,8 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, // input. If that isn't small enough, the resulting pieces will be further // legalized. const unsigned NewBitSize = DstEltSize / 2; - const LLT HalfTy = LLT::scalar(NewBitSize); - const LLT CondTy = LLT::scalar(1); + const LLT HalfTy = LLT::integer(NewBitSize); + const LLT CondTy = LLT::integer(1); if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) { return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy, @@ -6448,12 +6460,12 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl &DstRegs, // Add all factors and accumulate all carries into CarrySum. if (DstIdx != DstParts - 1) { MachineInstrBuilder Uaddo = - B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); + B.buildUAddo(NarrowTy, LLT::integer(1), Factors[0], Factors[1]); FactorSum = Uaddo.getReg(0); CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); for (unsigned i = 2; i < Factors.size(); ++i) { MachineInstrBuilder Uaddo = - B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); + B.buildUAddo(NarrowTy, LLT::integer(1), FactorSum, Factors[i]); FactorSum = Uaddo.getReg(0); MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); @@ -6543,7 +6555,7 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, if (i == e - 1 && CarryDst) CarryOut = CarryDst; else - CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + CarryOut = MRI.createGenericVirtualRegister(LLT::integer(1)); if (!CarryIn) { MIRBuilder.buildInstr(OpO, {DstReg, CarryOut}, @@ -6672,7 +6684,7 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, Register SegReg = SrcRegs[i]; if (ExtractOffset != 0 || SegSize != NarrowSize) { // A genuine extract is needed. - SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + SegReg = MRI.createGenericVirtualRegister(LLT::integer(SegSize)); MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); } @@ -6751,7 +6763,7 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, Register SegReg = OpReg; if (ExtractOffset != 0 || SegSize != OpSize) { // A genuine extract is needed. - SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + SegReg = MRI.createGenericVirtualRegister(LLT::integer(SegSize)); MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); } @@ -6763,7 +6775,8 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, uint64_t WideSize = DstRegs.size() * NarrowSize; Register DstReg = MI.getOperand(0).getReg(); if (WideSize > RegTy.getSizeInBits()) { - Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize)); + Register MergeReg = + MRI.createGenericVirtualRegister(LLT::integer(WideSize)); MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs); MIRBuilder.buildTrunc(DstReg, MergeReg); } else @@ -6897,7 +6910,7 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) auto C_0 = B.buildConstant(NarrowTy, 0); - auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::integer(1), UnmergeSrc.getReg(1), C_0); auto LoCTLZ = IsUndef ? B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : @@ -6930,7 +6943,7 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) auto C_0 = B.buildConstant(NarrowTy, 0); - auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::integer(1), UnmergeSrc.getReg(0), C_0); auto HiCTTZ = IsUndef ? B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : @@ -7021,8 +7034,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); - auto ICmp = MIRBuilder.buildICmp( - CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); + auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, + SrcTy.changeElementType(LLT::integer(1)), + SrcReg, ZeroSrc); auto LenConst = MIRBuilder.buildConstant(DstTy, Len); MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); MI.eraseFromParent(); @@ -7069,8 +7083,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // zero. auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); auto Zero = MIRBuilder.buildConstant(SrcTy, 0); - auto ICmp = MIRBuilder.buildICmp( - CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); + auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, + DstTy.changeElementType(LLT::integer(1)), + SrcReg, Zero); auto LenConst = MIRBuilder.buildConstant(DstTy, Len); MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); MI.eraseFromParent(); @@ -7482,11 +7497,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { auto [Dst, Src] = MI.getFirst2Regs(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - const LLT S1 = LLT::scalar(1); + const LLT I64 = LLT::integer(64); + const LLT I32 = LLT::integer(32); + const LLT I1 = LLT::integer(1); - assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + assert(MRI.getType(Src).isInteger(64) && MRI.getType(Dst).isFloat(32)); // unsigned cul2f(ulong u) { // uint lz = clz(u); @@ -7498,38 +7513,39 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { // return as_float(v + r); // } - auto Zero32 = MIRBuilder.buildConstant(S32, 0); - auto Zero64 = MIRBuilder.buildConstant(S64, 0); + auto Zero32 = MIRBuilder.buildConstant(I32, 0); + auto Zero64 = MIRBuilder.buildConstant(I64, 0); - auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); + auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(I32, Src); - auto K = MIRBuilder.buildConstant(S32, 127U + 63U); - auto Sub = MIRBuilder.buildSub(S32, K, LZ); + auto K = MIRBuilder.buildConstant(I32, 127U + 63U); + auto Sub = MIRBuilder.buildSub(I32, K, LZ); - auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); - auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); + auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, Src, Zero64); + auto E = MIRBuilder.buildSelect(I32, NotZero, Sub, Zero32); - auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); - auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); + auto Mask0 = MIRBuilder.buildConstant(I64, (-1ULL) >> 1); + auto ShlLZ = MIRBuilder.buildShl(I64, Src, LZ); - auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); + auto U = MIRBuilder.buildAnd(I64, ShlLZ, Mask0); - auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); - auto T = MIRBuilder.buildAnd(S64, U, Mask1); + auto Mask1 = MIRBuilder.buildConstant(I64, 0xffffffffffULL); + auto T = MIRBuilder.buildAnd(I64, U, Mask1); - auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); - auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); - auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); + auto UShl = MIRBuilder.buildLShr(I64, U, MIRBuilder.buildConstant(I64, 40)); + auto ShlE = MIRBuilder.buildShl(I32, E, MIRBuilder.buildConstant(I32, 23)); + auto V = MIRBuilder.buildOr(I32, ShlE, MIRBuilder.buildTrunc(I32, UShl)); - auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); - auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); - auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); - auto One = MIRBuilder.buildConstant(S32, 1); + auto C = MIRBuilder.buildConstant(I64, 0x8000000000ULL); + auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, I1, T, C); + auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, I1, T, C); + auto One = MIRBuilder.buildConstant(I32, 1); - auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); - auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); - auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); - MIRBuilder.buildAdd(Dst, V, R); + auto VTrunc1 = MIRBuilder.buildAnd(I32, V, One); + auto Select0 = MIRBuilder.buildSelect(I32, TCmp, VTrunc1, Zero32); + auto R = MIRBuilder.buildSelect(I32, RCmp, One, Select0); + auto Add = MIRBuilder.buildAdd(I32, V, R); + MIRBuilder.buildBitcast(Dst, Add); MI.eraseFromParent(); return Legalized; @@ -7540,30 +7556,30 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) { auto [Dst, Src] = MI.getFirst2Regs(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - const LLT S1 = LLT::scalar(1); + const LLT I64 = LLT::integer(64); + const LLT F32 = LLT::float32(); + const LLT I1 = LLT::integer(1); - assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + assert(MRI.getType(Src).isInteger(64) && MRI.getType(Dst).isFloat(32)); // For i64 < INT_MAX we simply reuse SITOFP. // Otherwise, divide i64 by 2, round result by ORing with the lowest bit // saved before division, convert to float by SITOFP, multiply the result // by 2. - auto One = MIRBuilder.buildConstant(S64, 1); - auto Zero = MIRBuilder.buildConstant(S64, 0); + auto One = MIRBuilder.buildConstant(I64, 1); + auto Zero = MIRBuilder.buildConstant(I64, 0); // Result if Src < INT_MAX - auto SmallResult = MIRBuilder.buildSITOFP(S32, Src); + auto SmallResult = MIRBuilder.buildSITOFP(F32, Src); // Result if Src >= INT_MAX - auto Halved = MIRBuilder.buildLShr(S64, Src, One); - auto LowerBit = MIRBuilder.buildAnd(S64, Src, One); - auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit); - auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved); - auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP); + auto Halved = MIRBuilder.buildLShr(I64, Src, One); + auto LowerBit = MIRBuilder.buildAnd(I64, Src, One); + auto RoundedHalved = MIRBuilder.buildOr(I64, Halved, LowerBit); + auto HalvedFP = MIRBuilder.buildSITOFP(F32, RoundedHalved); + auto LargeResult = MIRBuilder.buildFAdd(F32, HalvedFP, HalvedFP); // Check if the original value is larger than INT_MAX by comparing with // zero to pick one of the two conversions. auto IsLarge = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, I1, Src, Zero); MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult); MI.eraseFromParent(); @@ -7575,10 +7591,11 @@ LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) { auto [Dst, Src] = MI.getFirst2Regs(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); + const LLT F64 = LLT::float64(); + const LLT I64 = LLT::integer(64); + const LLT I32 = LLT::integer(32); - assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); + assert(MRI.getType(Src).isInteger(64) && MRI.getType(Dst).isFloat(64)); // We create double value from 32 bit parts with 32 exponent difference. // Note that + and - are float operations that adjust the implicit leading @@ -7589,18 +7606,18 @@ LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) { // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0 // = - 2^52 * 1.0...HighBits // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits - auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000)); - auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000)); + auto TwoP52 = MIRBuilder.buildConstant(I64, UINT64_C(0x4330000000000000)); + auto TwoP84 = MIRBuilder.buildConstant(I64, UINT64_C(0x4530000000000000)); auto TwoP52P84 = llvm::bit_cast(UINT64_C(0x4530000000100000)); - auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84); - auto HalfWidth = MIRBuilder.buildConstant(S64, 32); - - auto LowBits = MIRBuilder.buildTrunc(S32, Src); - LowBits = MIRBuilder.buildZExt(S64, LowBits); - auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits); - auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth); - auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits); - auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP); + auto TwoP52P84FP = MIRBuilder.buildFConstant(F64, TwoP52P84); + auto HalfWidth = MIRBuilder.buildConstant(I64, 32); + + auto LowBits = MIRBuilder.buildTrunc(I32, Src); + LowBits = MIRBuilder.buildZExt(I64, LowBits); + auto LowBitsFP = MIRBuilder.buildOr(I64, TwoP52, LowBits); + auto HighBits = MIRBuilder.buildLShr(I64, Src, HalfWidth); + auto HighBitsFP = MIRBuilder.buildOr(I64, TwoP84, HighBits); + auto Scratch = MIRBuilder.buildFSub(F64, HighBitsFP, TwoP52P84FP); MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP); MI.eraseFromParent(); @@ -7618,17 +7635,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { return Legalized; } - if (SrcTy != LLT::scalar(64)) + if (!SrcTy.isInteger(64)) return UnableToLegalize; - if (DstTy == LLT::scalar(32)) + if (DstTy.isFloat(32)) // TODO: SelectionDAG has several alternative expansions to port which may // be more reasonable depending on the available instructions. We also need // a more advanced mechanism to choose an optimal version depending on // target features such as sitofp or CTLZ availability. return lowerU64ToF32WithSITOFP(MI); - if (DstTy == LLT::scalar(64)) + if (DstTy.isFloat(64)) return lowerU64ToF64BitFloatOps(MI); return UnableToLegalize; @@ -7637,11 +7654,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - const LLT S1 = LLT::scalar(1); + const LLT I64 = LLT::integer(64); + const LLT F32 = LLT::float32(); + const LLT I1 = LLT::integer(1); - if (SrcTy == S1) { + if (SrcTy.isInteger(1)) { auto True = MIRBuilder.buildFConstant(DstTy, -1.0); auto False = MIRBuilder.buildFConstant(DstTy, 0.0); MIRBuilder.buildSelect(Dst, Src, True, False); @@ -7649,26 +7666,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { return Legalized; } - if (SrcTy != S64) + if (!SrcTy.isInteger(64)) return UnableToLegalize; - if (DstTy == S32) { + if (DstTy.isFloat(32)) { // signed cl2f(long l) { // long s = l >> 63; // float r = cul2f((l + s) ^ s); // return s ? -r : r; // } Register L = Src; - auto SignBit = MIRBuilder.buildConstant(S64, 63); - auto S = MIRBuilder.buildAShr(S64, L, SignBit); + auto SignBit = MIRBuilder.buildConstant(I64, 63); + auto S = MIRBuilder.buildAShr(I64, L, SignBit); - auto LPlusS = MIRBuilder.buildAdd(S64, L, S); - auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); - auto R = MIRBuilder.buildUITOFP(S32, Xor); + auto LPlusS = MIRBuilder.buildAdd(I64, L, S); + auto Xor = MIRBuilder.buildXor(I64, LPlusS, S); + auto R = MIRBuilder.buildUITOFP(F32, Xor); - auto RNeg = MIRBuilder.buildFNeg(S32, R); - auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, - MIRBuilder.buildConstant(S64, 0)); + auto RNeg = MIRBuilder.buildFNeg(F32, R); + auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, S, + MIRBuilder.buildConstant(I64, 0)); MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); MI.eraseFromParent(); return Legalized; @@ -7679,12 +7696,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); - if (SrcTy != S64 && SrcTy != S32) + if (!SrcTy.isFloat(64) && !SrcTy.isFloat(32)) return UnableToLegalize; - if (DstTy != S32 && DstTy != S64) + if (!DstTy.isInteger(32) && !DstTy.isInteger(64)) return UnableToLegalize; // FPTOSI gives same result as FPTOUI for positive signed integers. @@ -7707,10 +7722,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); - const LLT S1 = LLT::scalar(1); + const LLT I1 = LLT::integer(1); MachineInstrBuilder FCMP = - MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold); + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, I1, Src, Threshold); MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); MI.eraseFromParent(); @@ -7719,11 +7734,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); - const LLT S64 = LLT::scalar(64); - const LLT S32 = LLT::scalar(32); // FIXME: Only f32 to i64 conversions are supported. - if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64) + if (!SrcTy.getScalarType().isFloat(32) || + !DstTy.getScalarType().isInteger(64)) return UnableToLegalize; // Expand f32 -> i64 conversion @@ -7760,9 +7774,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent); auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub); - const LLT S1 = LLT::scalar(1); - auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, - S1, Exponent, ExponentLoBit); + const LLT I1 = LLT::integer(1); + auto CmpGt = + MIRBuilder.buildICmp(CmpInst::ICMP_SGT, I1, Exponent, ExponentLoBit); R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl); @@ -7771,8 +7785,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0); - auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, - S1, Exponent, ZeroSrcTy); + auto ExponentLt0 = + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, I1, Exponent, ZeroSrcTy); auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0); MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret); @@ -7816,14 +7830,14 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { if (AreExactFloatBounds) { // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat); - auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, - SrcTy.changeElementSize(1), Src, MaxC); + auto MaxP = MIRBuilder.buildFCmp( + CmpInst::FCMP_OGT, SrcTy.changeElementType(LLT::integer(1)), Src, MaxC); auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC); // Clamp by MaxFloat from above. NaN cannot occur. auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat); - auto MinP = - MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max, - MinC, MachineInstr::FmNoNans); + auto MinP = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, + SrcTy.changeElementType(LLT::integer(1)), + Max, MinC, MachineInstr::FmNoNans); auto Min = MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans); // Convert clamped value to integer. In the unsigned case we're done, @@ -7836,8 +7850,8 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { // Otherwise, select 0 if Src is NaN. auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min); - auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, - DstTy.changeElementSize(1), Src, Src); + auto IsZero = MIRBuilder.buildFCmp( + CmpInst::FCMP_UNO, DstTy.changeElementType(LLT::integer(1)), Src, Src); MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), FpToInt); MI.eraseFromParent(); @@ -7852,15 +7866,15 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { // If Src ULT MinFloat, select MinInt. In particular, this also selects // MinInt if Src is NaN. - auto ULT = - MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src, - MIRBuilder.buildFConstant(SrcTy, MinFloat)); + auto ULT = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, + SrcTy.changeElementType(LLT::integer(1)), Src, + MIRBuilder.buildFConstant(SrcTy, MinFloat)); auto Max = MIRBuilder.buildSelect( DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt); // If Src OGT MaxFloat, select MaxInt. - auto OGT = - MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src, - MIRBuilder.buildFConstant(SrcTy, MaxFloat)); + auto OGT = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, + SrcTy.changeElementType(LLT::integer(1)), Src, + MIRBuilder.buildFConstant(SrcTy, MaxFloat)); // In the unsigned case we are done, because we mapped NaN to MinInt, which // is already zero. @@ -7874,8 +7888,8 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { // Otherwise, select 0 if Src is NaN. auto Min = MIRBuilder.buildSelect( DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max); - auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, - DstTy.changeElementSize(1), Src, Src); + auto IsZero = MIRBuilder.buildFCmp( + CmpInst::FCMP_UNO, DstTy.changeElementType(LLT::integer(1)), Src, Src); MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min); MI.eraseFromParent(); return Legalized; @@ -7884,19 +7898,22 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { // f64 -> f16 conversion using round-to-nearest-even rounding mode. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { - const LLT S1 = LLT::scalar(1); - const LLT S32 = LLT::scalar(32); + const LLT I1 = LLT::integer(1); + const LLT F32 = LLT::float32(); + const LLT I16 = LLT::integer(16); + const LLT I32 = LLT::integer(32); + const LLT I64 = LLT::integer(64); auto [Dst, Src] = MI.getFirst2Regs(); - assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) && - MRI.getType(Src).getScalarType() == LLT::scalar(64)); + assert(MRI.getType(Dst).getScalarType().isFloat(16) && + MRI.getType(Src).getScalarType().isFloat(64)); if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. return UnableToLegalize; if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) { unsigned Flags = MI.getFlags(); - auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags); + auto Src32 = MIRBuilder.buildFPTrunc(F32, Src, Flags); MIRBuilder.buildFPTrunc(Dst, Src32, Flags); MI.eraseFromParent(); return Legalized; @@ -7906,93 +7923,95 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { const unsigned ExpBiasf64 = 1023; const unsigned ExpBiasf16 = 15; - auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); + auto Unmerge = + MIRBuilder.buildUnmerge(I32, MIRBuilder.buildBitcast(I64, Src)); Register U = Unmerge.getReg(0); Register UH = Unmerge.getReg(1); - auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20)); - E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask)); + auto E = MIRBuilder.buildLShr(I32, UH, MIRBuilder.buildConstant(I32, 20)); + E = MIRBuilder.buildAnd(I32, E, MIRBuilder.buildConstant(I32, ExpMask)); // Subtract the fp64 exponent bias (1023) to get the real exponent and // add the f16 bias (15) to get the biased exponent for the f16 format. E = MIRBuilder.buildAdd( - S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16)); + I32, E, MIRBuilder.buildConstant(I32, -ExpBiasf64 + ExpBiasf16)); - auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8)); - M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe)); + auto M = MIRBuilder.buildLShr(I32, UH, MIRBuilder.buildConstant(I32, 8)); + M = MIRBuilder.buildAnd(I32, M, MIRBuilder.buildConstant(I32, 0xffe)); - auto MaskedSig = MIRBuilder.buildAnd(S32, UH, - MIRBuilder.buildConstant(S32, 0x1ff)); - MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U); + auto MaskedSig = + MIRBuilder.buildAnd(I32, UH, MIRBuilder.buildConstant(I32, 0x1ff)); + MaskedSig = MIRBuilder.buildOr(I32, MaskedSig, U); - auto Zero = MIRBuilder.buildConstant(S32, 0); - auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero); - auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0); - M = MIRBuilder.buildOr(S32, M, Lo40Set); + auto Zero = MIRBuilder.buildConstant(I32, 0); + auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, MaskedSig, Zero); + auto Lo40Set = MIRBuilder.buildZExt(I32, SigCmpNE0); + M = MIRBuilder.buildOr(I32, M, Lo40Set); // (M != 0 ? 0x0200 : 0) | 0x7c00; - auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200); - auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero); - auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero); + auto Bits0x200 = MIRBuilder.buildConstant(I32, 0x0200); + auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, M, Zero); + auto SelectCC = MIRBuilder.buildSelect(I32, CmpM_NE0, Bits0x200, Zero); - auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00); - auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00); + auto Bits0x7c00 = MIRBuilder.buildConstant(I32, 0x7c00); + auto I = MIRBuilder.buildOr(I32, SelectCC, Bits0x7c00); // N = M | (E << 12); - auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12)); - auto N = MIRBuilder.buildOr(S32, M, EShl12); + auto EShl12 = MIRBuilder.buildShl(I32, E, MIRBuilder.buildConstant(I32, 12)); + auto N = MIRBuilder.buildOr(I32, M, EShl12); // B = clamp(1-E, 0, 13); - auto One = MIRBuilder.buildConstant(S32, 1); - auto OneSubExp = MIRBuilder.buildSub(S32, One, E); - auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero); - B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13)); + auto One = MIRBuilder.buildConstant(I32, 1); + auto OneSubExp = MIRBuilder.buildSub(I32, One, E); + auto B = MIRBuilder.buildSMax(I32, OneSubExp, Zero); + B = MIRBuilder.buildSMin(I32, B, MIRBuilder.buildConstant(I32, 13)); - auto SigSetHigh = MIRBuilder.buildOr(S32, M, - MIRBuilder.buildConstant(S32, 0x1000)); + auto SigSetHigh = + MIRBuilder.buildOr(I32, M, MIRBuilder.buildConstant(I32, 0x1000)); - auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B); - auto D0 = MIRBuilder.buildShl(S32, D, B); + auto D = MIRBuilder.buildLShr(I32, SigSetHigh, B); + auto D0 = MIRBuilder.buildShl(I32, D, B); - auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, - D0, SigSetHigh); - auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh); - D = MIRBuilder.buildOr(S32, D, D1); + auto D0_NE_SigSetHigh = + MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, D0, SigSetHigh); + auto D1 = MIRBuilder.buildZExt(I32, D0_NE_SigSetHigh); + D = MIRBuilder.buildOr(I32, D, D1); - auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One); - auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N); + auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, I1, E, One); + auto V = MIRBuilder.buildSelect(I32, CmpELtOne, D, N); - auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7)); - V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2)); + auto VLow3 = MIRBuilder.buildAnd(I32, V, MIRBuilder.buildConstant(I32, 7)); + V = MIRBuilder.buildLShr(I32, V, MIRBuilder.buildConstant(I32, 2)); - auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3, - MIRBuilder.buildConstant(S32, 3)); - auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3); + auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, I1, VLow3, + MIRBuilder.buildConstant(I32, 3)); + auto V0 = MIRBuilder.buildZExt(I32, VLow3Eq3); - auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3, - MIRBuilder.buildConstant(S32, 5)); - auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5); + auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, I1, VLow3, + MIRBuilder.buildConstant(I32, 5)); + auto V1 = MIRBuilder.buildZExt(I32, VLow3Gt5); - V1 = MIRBuilder.buildOr(S32, V0, V1); - V = MIRBuilder.buildAdd(S32, V, V1); + V1 = MIRBuilder.buildOr(I32, V0, V1); + V = MIRBuilder.buildAdd(I32, V, V1); - auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, - E, MIRBuilder.buildConstant(S32, 30)); - V = MIRBuilder.buildSelect(S32, CmpEGt30, - MIRBuilder.buildConstant(S32, 0x7c00), V); + auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, I1, E, + MIRBuilder.buildConstant(I32, 30)); + V = MIRBuilder.buildSelect(I32, CmpEGt30, + MIRBuilder.buildConstant(I32, 0x7c00), V); - auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, - E, MIRBuilder.buildConstant(S32, 1039)); - V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V); + auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, I1, E, + MIRBuilder.buildConstant(I32, 1039)); + V = MIRBuilder.buildSelect(I32, CmpEGt1039, I, V); // Extract the sign bit. - auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16)); - Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000)); + auto Sign = MIRBuilder.buildLShr(I32, UH, MIRBuilder.buildConstant(I32, 16)); + Sign = MIRBuilder.buildAnd(I32, Sign, MIRBuilder.buildConstant(I32, 0x8000)); // Insert the sign bit - V = MIRBuilder.buildOr(S32, Sign, V); + V = MIRBuilder.buildOr(I32, Sign, V); - MIRBuilder.buildTrunc(Dst, V); + auto Trunc = MIRBuilder.buildTrunc(I16, V); + MIRBuilder.buildBitcast(Dst, Trunc); MI.eraseFromParent(); return Legalized; } @@ -8000,10 +8019,8 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { auto [DstTy, SrcTy] = MI.getFirst2LLTs(); - const LLT S64 = LLT::scalar(64); - const LLT S16 = LLT::scalar(16); - if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) + if (DstTy.getScalarType().isFloat(16) && SrcTy.getScalarType().isFloat(64)) return lowerFPTRUNC_F64_TO_F16(MI); return UnableToLegalize; @@ -8038,7 +8055,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { auto [Dst, Src0, Src1] = MI.getFirst3Regs(); const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); - LLT CmpType = MRI.getType(Dst).changeElementSize(1); + LLT CmpType = MRI.getType(Dst).changeElementType(LLT::integer(1)); auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); @@ -8054,7 +8071,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) { Register Dst = Cmp->getReg(0); LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Cmp->getReg(1)); - LLT CmpTy = DstTy.changeElementSize(1); + LLT CmpTy = DstTy.changeElementType(LLT::integer(1)); CmpInst::Predicate LTPredicate = Cmp->isSigned() ? CmpInst::Predicate::ICMP_SLT @@ -8100,26 +8117,34 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) { const int Src0Size = Src0Ty.getScalarSizeInBits(); const int Src1Size = Src1Ty.getScalarSizeInBits(); - auto SignBitMask = MIRBuilder.buildConstant( - Src0Ty, APInt::getSignMask(Src0Size)); + LLT Src0IntTy = Src0Ty.changeToInteger(); + LLT Src1IntTy = Src1Ty.changeToInteger(); + LLT DstIntTy = DstTy.changeToInteger(); + + auto SignBitMask = + MIRBuilder.buildConstant(Src0IntTy, APInt::getSignMask(Src0Size)); auto NotSignBitMask = MIRBuilder.buildConstant( - Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); + Src0IntTy, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); - Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0); + auto Src0Int = MIRBuilder.buildBitcast(Src0IntTy, Src0); + auto Src1Int = MIRBuilder.buildBitcast(Src1IntTy, Src1); + + Register And0 = + MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0); Register And1; if (Src0Ty == Src1Ty) { - And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0); + And1 = MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0); } else if (Src0Size > Src1Size) { - auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); - auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); - auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); - And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0); + auto ShiftAmt = MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size); + auto Zext = MIRBuilder.buildZExt(Src0IntTy, Src1Int); + auto Shift = MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt); + And1 = MIRBuilder.buildAnd(Src0IntTy, Shift, SignBitMask).getReg(0); } else { - auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); - auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); - auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); - And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0); + auto ShiftAmt = MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size); + auto Shift = MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt); + auto Trunc = MIRBuilder.buildTrunc(Src0IntTy, Shift); + And1 = MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0); } // Be careful about setting nsz/nnan/ninf on every instruction, since the @@ -8130,7 +8155,8 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) { // We masked the sign bit and the not-sign bit, so these are disjoint. Flags |= MachineInstr::Disjoint; - MIRBuilder.buildOr(Dst, And0, And1, Flags); + auto Or = MIRBuilder.buildOr(DstIntTy, And0, And1, Flags); + MIRBuilder.buildBitcast(Dst, Or); MI.eraseFromParent(); return Legalized; @@ -8183,7 +8209,7 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { auto [DstReg, X] = MI.getFirst2Regs(); const unsigned Flags = MI.getFlags(); const LLT Ty = MRI.getType(DstReg); - const LLT CondTy = Ty.changeElementSize(1); + const LLT CondTy = Ty.changeElementType(LLT::integer(1)); // round(x) => // t = trunc(x); @@ -8216,7 +8242,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) { auto [DstReg, SrcReg] = MI.getFirst2Regs(); unsigned Flags = MI.getFlags(); LLT Ty = MRI.getType(DstReg); - const LLT CondTy = Ty.changeElementSize(1); + const LLT CondTy = Ty.changeElementType(LLT::integer(1)); // result = trunc(src); // if (src < 0.0 && src != result) @@ -8243,7 +8269,7 @@ LegalizerHelper::lowerMergeValues(MachineInstr &MI) { auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs(); unsigned PartSize = Src0Ty.getSizeInBits(); - LLT WideTy = LLT::scalar(DstTy.getSizeInBits()); + LLT WideTy = LLT::integer(DstTy.getSizeInBits()); Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0); for (unsigned I = 2; I != NumOps; ++I) { @@ -8281,9 +8307,24 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { Register SrcReg = MI.getOperand(NumDst).getReg(); Register Dst0Reg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst0Reg); + LLT SrcTy = MRI.getType(SrcReg); if (DstTy.isPointer()) return UnableToLegalize; // TODO + for (auto &Def : MI.all_defs()) { + LLT Ty = MRI.getType(Def.getReg()); + if (Ty.isFloat()) + bitcastDst(MI, Ty.changeToInteger(), Def.getOperandNo()); + } + + if (SrcTy.isFloatVector()) + bitcastSrc(MI, SrcTy.changeToInteger(), NumDst); + + SrcReg = MI.getOperand(NumDst).getReg(); + Dst0Reg = MI.getOperand(0).getReg(); + DstTy = MRI.getType(Dst0Reg); + SrcTy = MRI.getType(SrcReg); + SrcReg = coerceToScalar(SrcReg); if (!SrcReg) return UnableToLegalize; @@ -8391,7 +8432,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] = MI.getFirst3RegLLTs(); - LLT IdxTy = LLT::scalar(32); + LLT IdxTy = LLT::integer(32); ArrayRef Mask = MI.getOperand(3).getShuffleMask(); Register Undef; @@ -8443,7 +8484,7 @@ LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) { MachinePointerInfo ValPtrInfo = MachinePointerInfo::getUnknownStack(*MI.getMF()); - LLT IdxTy = LLT::scalar(32); + LLT IdxTy = LLT::integer(32); LLT ValTy = VecTy.getElementType(); Align ValAlign = getStackTemporaryAlignment(ValTy); @@ -8465,7 +8506,7 @@ LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) { } else if (HasPassthru) { auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask); Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD, - {LLT::scalar(32)}, {Popcount}); + {LLT::integer(32)}, {Popcount}); Register LastElmtPtr = getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0)); @@ -8485,7 +8526,7 @@ LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) { LLT MaskITy = MaskTy.getElementType(); auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx); if (MaskITy.getSizeInBits() > 1) - MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI); + MaskI = MIRBuilder.buildTrunc(LLT::integer(1), MaskI); MaskI = MIRBuilder.buildZExt(IdxTy, MaskI); OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI); @@ -8494,7 +8535,7 @@ LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) { auto EndOfVector = MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1); auto AllLanesSelected = MIRBuilder.buildICmp( - CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector); + CmpInst::ICMP_UGT, LLT::integer(1), OutPos, EndOfVector); OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy}, {OutPos, EndOfVector}); ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0)); @@ -8517,7 +8558,7 @@ Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy) { - LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + LLT IntPtrTy = LLT::integer(PtrTy.getSizeInBits()); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -8616,8 +8657,8 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { (SrcTy.isScalar() || (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { LLT SrcIntTy = SrcTy; - if (!SrcTy.isScalar()) { - SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + if (!SrcTy.isInteger()) { + SrcIntTy = LLT::integer(SrcTy.getSizeInBits()); SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0); } @@ -8697,13 +8738,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { LLT IntDstTy = DstTy; - if (!DstTy.isScalar()) { - IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + if (!DstTy.isInteger()) { + IntDstTy = LLT::integer(DstTy.getSizeInBits()); Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0); } - if (!InsertTy.isScalar()) { - const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits()); + if (!InsertTy.isInteger()) { + const LLT IntInsertTy = LLT::integer(InsertTy.getSizeInBits()); InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0); } @@ -8843,7 +8884,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { auto [Res, LHS, RHS] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Res); - LLT BoolTy = Ty.changeElementSize(1); + LLT BoolTy = Ty.changeElementType(LLT::integer(1)); + bool IsSigned; bool IsAdd; unsigned OverflowOp; @@ -8913,7 +8955,7 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) { bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; auto [Res, LHS, RHS] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Res); - LLT BoolTy = Ty.changeElementSize(1); + LLT BoolTy = Ty.changeElementType(LLT::integer(1)); unsigned BW = Ty.getScalarSizeInBits(); auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); @@ -9105,7 +9147,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { unsigned BitSize = SrcTy.getScalarSizeInBits(); const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType()); - LLT IntTy = LLT::scalar(BitSize); + LLT IntTy = LLT::integer(BitSize); if (SrcTy.isVector()) IntTy = LLT::vector(SrcTy.getElementCount(), IntTy); auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg); @@ -9264,7 +9306,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { bool IsEltPtr = DstTy.isPointerOrPointerVector(); if (IsEltPtr) { - LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits()); + LLT ScalarPtrTy = LLT::integer(DstTy.getScalarSizeInBits()); LLT NewTy = DstTy.changeElementType(ScalarPtrTy); Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0); Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0); @@ -9367,7 +9409,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) { Register SrcReg = MI.getOperand(1).getReg(); Register DestReg = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1); + LLT Ty = MRI.getType(SrcReg), IType = LLT::integer(1); auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0); auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0); auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero); @@ -9381,12 +9423,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(DstReg); + LLT IntTy = Ty.changeToInteger(); // Reset sign bit - MIRBuilder.buildAnd( - DstReg, SrcReg, - MIRBuilder.buildConstant( - Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits()))); + auto Bitcast = MIRBuilder.buildBitcast(IntTy, SrcReg); + auto SignMax = MIRBuilder.buildConstant( + IntTy, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())); + auto And = MIRBuilder.buildAnd(IntTy, Bitcast, SignMax); + MIRBuilder.buildBitcast(DstReg, And); MI.eraseFromParent(); return Legalized; @@ -9426,7 +9470,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) { auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0); const Align A(MI.getOperand(2).getImm()); - LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits()); + LLT PtrTyAsScalarTy = LLT::integer(PtrTy.getSizeInBits()); if (A > TLI.getMinStackArgumentAlignment()) { Register AlignAmt = MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0); @@ -9483,11 +9527,11 @@ static bool findGISelOptimalMemOpLowering(std::vector &MemOps, // Use the largest scalar type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). - Ty = LLT::scalar(64); + Ty = LLT::integer(64); if (Op.isFixedDstAlign()) while (Op.getDstAlign() < Ty.getSizeInBytes() && !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) - Ty = LLT::scalar(Ty.getSizeInBytes()); + Ty = LLT::integer(Ty.getSizeInBytes()); assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); // FIXME: check for the largest legal type we can load/store to. } @@ -9502,8 +9546,9 @@ static bool findGISelOptimalMemOpLowering(std::vector &MemOps, // FIXME: check for mem op safety and legality of the types. Not all of // SDAGisms map cleanly to GISel concepts. if (NewTy.isVector()) - NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); - NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1)); + NewTy = + NewTy.getSizeInBits() > 64 ? LLT::integer(64) : LLT::integer(32); + NewTy = LLT::integer(llvm::bit_floor(NewTy.getSizeInBits() - 1)); unsigned NewTySize = NewTy.getSizeInBytes(); assert(NewTySize > 0 && "Could not find appropriate type"); @@ -9667,7 +9712,7 @@ LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val, Register Ptr = Dst; if (DstOff != 0) { auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); + MIB.buildConstant(LLT::integer(PtrTy.getSizeInBits()), DstOff); Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); } @@ -9806,8 +9851,9 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, Register Offset; if (CurrOffset != 0) { LLT SrcTy = MRI.getType(Src); - Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset) - .getReg(0); + Offset = + MIB.buildConstant(LLT::integer(SrcTy.getSizeInBits()), CurrOffset) + .getReg(0); LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); @@ -9905,7 +9951,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, if (CurrOffset != 0) { LLT SrcTy = MRI.getType(Src); auto Offset = - MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset); + MIB.buildConstant(LLT::integer(SrcTy.getSizeInBits()), CurrOffset); LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); @@ -9923,7 +9969,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, if (CurrOffset != 0) { LLT DstTy = MRI.getType(Dst); auto Offset = - MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset); + MIB.buildConstant(LLT::integer(DstTy.getSizeInBits()), CurrOffset); StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1d7fe92346fc4..8ed8f62678195 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -609,7 +609,7 @@ bool llvm::extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy, return true; } - LeftoverTy = LLT::scalar(LeftoverSize); + LeftoverTy = LLT::integer(LeftoverSize); // For irregular sizes, extract the individual parts. for (unsigned I = 0; I != NumParts; ++I) { Register NewReg = MRI.createGenericVirtualRegister(MainTy); diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp index 836c81b524672..3c641076d59da 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" #include "GISelMITest.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/TypeSize.h" #include "gtest/gtest.h" using namespace llvm; @@ -54,7 +55,7 @@ TEST(LegalizerInfoTest, ScalarRISC) { // Typical RISCy set of operations based on AArch64. for (unsigned Op : {G_ADD, G_SUB}) { for (unsigned Size : {32, 64}) - LegacyInfo.setAction({Op, 0, LLT::scalar(Size)}, + LegacyInfo.setAction({Op, 0, LLT::integer(Size)}, LegacyLegalizeActions::Legal); LegacyInfo.setLegalizeScalarToDifferentSizeStrategy( Op, 0, LegacyLegalizerInfo::widenToLargerTypesAndNarrowToLargest); @@ -64,29 +65,29 @@ TEST(LegalizerInfoTest, ScalarRISC) { for (unsigned opcode : {G_ADD, G_SUB}) { // Check we infer the correct types and actually do what we're told. - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(8)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(16)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(32)}}), + EXPECT_EQ(L.getAction({opcode, {LLT::integer(8)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(16)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(32)}}), LegalizeActionStep(Legal, 0, LLT{})); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(64)}}), + EXPECT_EQ(L.getAction({opcode, {LLT::integer(64)}}), LegalizeActionStep(Legal, 0, LLT{})); // Make sure the default for over-sized types applies. - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(128)}}), - LegalizeActionStep(NarrowScalar, 0, LLT::scalar(64))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(128)}}), + LegalizeActionStep(NarrowScalar, 0, LLT::integer(64))); // Make sure we also handle unusual sizes - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(1)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(31)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(33)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(64))); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(63)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(64))); - EXPECT_EQ(L.getAction({opcode, {LLT::scalar(65)}}), - LegalizeActionStep(NarrowScalar, 0, LLT::scalar(64))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(1)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(31)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(33)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(64))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(63)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(64))); + EXPECT_EQ(L.getAction({opcode, {LLT::integer(65)}}), + LegalizeActionStep(NarrowScalar, 0, LLT::integer(64))); } } @@ -95,17 +96,17 @@ TEST(LegalizerInfoTest, VectorRISC) { LegalizerInfo L; auto &LegacyInfo = L.getLegacyLegalizerInfo(); // Typical RISCy set of operations based on ARM. - LegacyInfo.setAction({G_ADD, LLT::fixed_vector(8, 8)}, + LegacyInfo.setAction({G_ADD, LLT::fixed_vector(8, LLT::integer(8))}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_ADD, LLT::fixed_vector(16, 8)}, + LegacyInfo.setAction({G_ADD, LLT::fixed_vector(16, LLT::integer(8))}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_ADD, LLT::fixed_vector(4, 16)}, + LegacyInfo.setAction({G_ADD, LLT::fixed_vector(4, LLT::integer(16))}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_ADD, LLT::fixed_vector(8, 16)}, + LegacyInfo.setAction({G_ADD, LLT::fixed_vector(8, LLT::integer(16))}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_ADD, LLT::fixed_vector(2, 32)}, + LegacyInfo.setAction({G_ADD, LLT::fixed_vector(2, LLT::integer(32))}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_ADD, LLT::fixed_vector(4, 32)}, + LegacyInfo.setAction({G_ADD, LLT::fixed_vector(4, LLT::integer(32))}, LegacyLegalizeActions::Legal); LegacyInfo.setLegalizeVectorElementToDifferentSizeStrategy( @@ -118,19 +119,24 @@ TEST(LegalizerInfoTest, VectorRISC) { // Check we infer the correct types and actually do what we're told for some // simple cases. - EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(8, 8)}}), + EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(8, LLT::integer(8))}}), LegalizeActionStep(Legal, 0, LLT{})); - EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(8, 7)}}), - LegalizeActionStep(WidenScalar, 0, LLT::fixed_vector(8, 8))); - EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(2, 8)}}), - LegalizeActionStep(MoreElements, 0, LLT::fixed_vector(8, 8))); - EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(8, 32)}}), - LegalizeActionStep(FewerElements, 0, LLT::fixed_vector(4, 32))); + EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(8, LLT::integer(7))}}), + LegalizeActionStep(WidenScalar, 0, + LLT::fixed_vector(8, LLT::integer(8)))); + EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(2, LLT::integer(8))}}), + LegalizeActionStep(MoreElements, 0, + LLT::fixed_vector(8, LLT::integer(8)))); + EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(8, LLT::integer(32))}}), + LegalizeActionStep(FewerElements, 0, + LLT::fixed_vector(4, LLT::integer(32)))); // Check a few non-power-of-2 sizes: - EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(3, 3)}}), - LegalizeActionStep(WidenScalar, 0, LLT::fixed_vector(3, 8))); - EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(3, 8)}}), - LegalizeActionStep(MoreElements, 0, LLT::fixed_vector(8, 8))); + EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(3, LLT::integer(3))}}), + LegalizeActionStep(WidenScalar, 0, + LLT::fixed_vector(3, LLT::integer(8)))); + EXPECT_EQ(L.getAction({G_ADD, {LLT::fixed_vector(3, LLT::integer(8))}}), + LegalizeActionStep(MoreElements, 0, + LLT::fixed_vector(8, LLT::integer(8)))); } TEST(LegalizerInfoTest, MultipleTypes) { @@ -176,10 +182,10 @@ TEST(LegalizerInfoTest, MultipleSteps) { LegacyInfo.computeTables(); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(16)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(32)}}), - LegalizeActionStep(Lower, 0, LLT::scalar(32))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(16)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(32)}}), + LegalizeActionStep(Lower, 0, LLT::integer(32))); } TEST(LegalizerInfoTest, SizeChangeStrategy) { @@ -187,7 +193,7 @@ TEST(LegalizerInfoTest, SizeChangeStrategy) { LegalizerInfo L; auto &LegacyInfo = L.getLegacyLegalizerInfo(); for (unsigned Size : {1, 8, 16, 32}) - LegacyInfo.setAction({G_UREM, 0, LLT::scalar(Size)}, + LegacyInfo.setAction({G_UREM, 0, LLT::integer(Size)}, LegacyLegalizeActions::Legal); LegacyInfo.setLegalizeScalarToDifferentSizeStrategy( @@ -196,21 +202,21 @@ TEST(LegalizerInfoTest, SizeChangeStrategy) { // Check we infer the correct types and actually do what we're told. for (unsigned Size : {1, 8, 16, 32}) { - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(Size)}}), + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(Size)}}), LegalizeActionStep(Legal, 0, LLT{})); } - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(2)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(8))); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(7)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(8))); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(9)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(16))); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(17)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(31)}}), - LegalizeActionStep(WidenScalar, 0, LLT::scalar(32))); - EXPECT_EQ(L.getAction({G_UREM, {LLT::scalar(33)}}), - LegalizeActionStep(Unsupported, 0, LLT::scalar(33))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(2)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(8))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(7)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(8))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(9)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(16))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(17)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(31)}}), + LegalizeActionStep(WidenScalar, 0, LLT::integer(32))); + EXPECT_EQ(L.getAction({G_UREM, {LLT::integer(33)}}), + LegalizeActionStep(Unsupported, 0, LLT::integer(33))); } } @@ -436,20 +442,20 @@ TEST(LegalizerInfoTest, RuleSets) { EXPECT_ACTION(MoreElements, 1, v2s1, LegalityQuery(G_SELECT, {v2s32, s1})); EXPECT_ACTION(MoreElements, 1, v4s1, LegalityQuery(G_SELECT, {v4p0, s1})); - EXPECT_ACTION(MoreElements, 1, LLT::scalable_vector(2, 1), - LegalityQuery(G_SELECT, {LLT::scalable_vector(2, 32), s1})); - EXPECT_ACTION(MoreElements, 1, LLT::scalable_vector(4, 1), - LegalityQuery(G_SELECT, {LLT::scalable_vector(4, 32), s1})); + EXPECT_ACTION(MoreElements, 1, LLT::scalable_vector(2, s1), + LegalityQuery(G_SELECT, {LLT::scalable_vector(2, s32), s1})); + EXPECT_ACTION(MoreElements, 1, LLT::scalable_vector(4, s1), + LegalityQuery(G_SELECT, {LLT::scalable_vector(4, s32), s1})); EXPECT_ACTION(MoreElements, 1, LLT::scalable_vector(2, s1), LegalityQuery(G_SELECT, {LLT::scalable_vector(2, p0), s1})); EXPECT_ACTION(FewerElements, 0, v2s32, LegalityQuery(G_ADD, {v4s32})); EXPECT_ACTION(FewerElements, 0, s32, LegalityQuery(G_ADD, {v8s32})); - EXPECT_ACTION(FewerElements, 0, LLT::scalable_vector(2, 16), - LegalityQuery(G_ADD, {LLT::scalable_vector(4, 16)})); + EXPECT_ACTION(FewerElements, 0, LLT::scalable_vector(2, s16), + LegalityQuery(G_ADD, {LLT::scalable_vector(4, s16)})); EXPECT_ACTION(FewerElements, 0, s16, - LegalityQuery(G_ADD, {LLT::scalable_vector(8, 16)})); + LegalityQuery(G_ADD, {LLT::scalable_vector(8, s16)})); } // Test minScalarEltSameAsIf From bf988990a4505b72b544df358829e5b2f29ce49d Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Tue, 28 Jan 2025 12:15:37 +0100 Subject: [PATCH 11/16] FPInfo: RegBankSelect --- llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 66 +++++++++++++------ llvm/lib/CodeGen/RegisterBankInfo.cpp | 13 +++- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index f1fec547ebd60..e5047225393b5 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" @@ -158,10 +159,11 @@ bool RegBankSelect::repairReg( // Build the instruction used to repair, then clone it at the right // places. Avoiding buildCopy bypasses the check that Src and Dst have the - // same types because the type is a placeholder when this function is called. + // same types because the type is a placeholder when this function is + // called. MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) - .addDef(Dst) - .addUse(Src); + .addDef(Dst) + .addUse(Src); LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << ':' << printRegClassOrBank(Src, *MRI, TRI) << " to: " << printReg(Dst) << ':' @@ -169,15 +171,17 @@ bool RegBankSelect::repairReg( } else { // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT // sequence. - assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported"); + assert(ValMapping.partsAllUniform() && + "irregular breakdowns not supported"); - LLT RegTy = MRI->getType(MO.getReg()); + Register MergeReg = MO.getReg(); + LLT RegTy = MRI->getType(MergeReg); if (MO.isDef()) { unsigned MergeOp; if (RegTy.isVector()) { - if (ValMapping.NumBreakDowns == RegTy.getNumElements()) + if (ValMapping.NumBreakDowns == RegTy.getNumElements()) { MergeOp = TargetOpcode::G_BUILD_VECTOR; - else { + } else { assert( (ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns == RegTy.getSizeInBits()) && @@ -187,12 +191,17 @@ bool RegBankSelect::repairReg( MergeOp = TargetOpcode::G_CONCAT_VECTORS; } - } else + } else { MergeOp = TargetOpcode::G_MERGE_VALUES; + if (RegTy.isFloat()) { + const RegisterBank *Bank = ValMapping.BreakDown[0].RegBank; + LLT Ty = RegTy.changeToInteger(); + MergeReg = MRI->createVirtualRegister({Bank, Ty}); + } + } auto MergeBuilder = - MIRBuilder.buildInstrNoInsert(MergeOp) - .addDef(MO.getReg()); + MIRBuilder.buildInstrNoInsert(MergeOp).addDef(MergeReg); for (Register SrcReg : NewVRegs) MergeBuilder.addUse(SrcReg); @@ -200,11 +209,18 @@ bool RegBankSelect::repairReg( MI = MergeBuilder; } else { MachineInstrBuilder UnMergeBuilder = - MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); + MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); for (Register DefReg : NewVRegs) UnMergeBuilder.addDef(DefReg); - UnMergeBuilder.addUse(MO.getReg()); + if (RegTy.isFloat()) { + const RegisterBank *Bank = ValMapping.BreakDown[0].RegBank; + MergeReg = + MIRBuilder.buildBitcast({Bank, RegTy.changeToInteger()}, MO.getReg()) + .getReg(0); + } + + UnMergeBuilder.addUse(MergeReg); MI = UnMergeBuilder; } } @@ -215,20 +231,30 @@ bool RegBankSelect::repairReg( // TODO: // Check if MI is legal. if not, we need to legalize all the // instructions we are going to insert. - std::unique_ptr NewInstrs( - new MachineInstr *[RepairPt.getNumInsertPoints()]); - bool IsFirst = true; - unsigned Idx = 0; - for (const std::unique_ptr &InsertPt : RepairPt) { + SmallVector NewInstrs; + NewInstrs.reserve(RepairPt.getNumInsertPoints()); + for (auto &&[Idx, InsertPt] : enumerate(RepairPt)) { MachineInstr *CurMI; - if (IsFirst) + if (Idx == 0) CurMI = MI; else CurMI = MIRBuilder.getMF().CloneMachineInstr(MI); + InsertPt->insert(*CurMI); - NewInstrs[Idx++] = CurMI; - IsFirst = false; + NewInstrs.push_back(CurMI); } + + LLT RegTy = MRI->getType(MO.getReg()); + if (MO.isDef() && RegTy.isFloat()) { + for (auto *MI : NewInstrs) { + auto Cast = MIRBuilder.buildInstrNoInsert(TargetOpcode::G_BITCAST) + .addDef(MO.getReg()) + .addUse(MI->getOperand(0).getReg()); + + MI->getParent()->insertAfter(MI, Cast.getInstr()); + } + } + // TODO: // Legalize NewInstrs if need be. return true; diff --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp index a0061ece8add6..c1276e5020115 100644 --- a/llvm/lib/CodeGen/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -707,12 +707,22 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, : &NewVRegs[StartIdx + NumVal]; } +static LLT inferType(LLT OrigType, + const RegisterBankInfo::PartialMapping &PartMap) { + if (PartMap.StartIdx == 0 && PartMap.Length == OrigType.getSizeInBits()) + return OrigType; + // TODO: check if this is a full lane of a vector type and extract the + // element type. + return LLT::integer(PartMap.Length); +} + void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); iterator_range::iterator> NewVRegsForOpIdx = getVRegsMem(OpIdx); const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx); const PartialMapping *PartMap = ValMapping.begin(); + LLT OrigType = getMRI().getType(getMI().getOperand(OpIdx).getReg()); for (Register &NewVReg : NewVRegsForOpIdx) { assert(PartMap != ValMapping.end() && "Out-of-bound access"); assert(NewVReg == 0 && "Register has already been created"); @@ -721,7 +731,8 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { // of the instruction. // The rationale is that this generic code cannot guess how the // target plans to split the input type. - NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length)); + LLT NewType = inferType(OrigType, *PartMap); + NewVReg = MRI.createGenericVirtualRegister(NewType); MRI.setRegBank(NewVReg, *PartMap->RegBank); ++PartMap; } From a7fa303257f1a6f4fe51aa9859a10a73a971b705 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 8 Jan 2025 14:43:26 +0000 Subject: [PATCH 12/16] FPInfo: Add InferTypeInfo pass --- .../CodeGen/GlobalISel/InferTypeInfoPass.h | 43 ++ llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 + llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp | 1 + .../CodeGen/GlobalISel/InferTypeInfoPass.cpp | 505 ++++++++++++++++++ .../llvm/lib/CodeGen/GlobalISel/BUILD.gn | 1 + 6 files changed, 552 insertions(+) create mode 100644 llvm/include/llvm/CodeGen/GlobalISel/InferTypeInfoPass.h create mode 100644 llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InferTypeInfoPass.h b/llvm/include/llvm/CodeGen/GlobalISel/InferTypeInfoPass.h new file mode 100644 index 0000000000000..7fbbe76c8ee6a --- /dev/null +++ b/llvm/include/llvm/CodeGen/GlobalISel/InferTypeInfoPass.h @@ -0,0 +1,43 @@ +#ifndef LLVM_CODEGEN_GLOBALISEL_INFERTYPEINFOPASS_H +#define LLVM_CODEGEN_GLOBALISEL_INFERTYPEINFOPASS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class InferTypeInfo : public MachineFunctionPass { +public: + static char ID; + +private: + MachineRegisterInfo *MRI = nullptr; + MachineFunction *MF = nullptr; + + MachineIRBuilder Builder; + + /// Initialize the field members using \p MF. + void init(MachineFunction &MF); + +public: + InferTypeInfo() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + bool inferTypeInfo(MachineFunction &MF); + + bool shouldBeFP(MachineOperand &Op, unsigned Depth) const; + + void updateDef(Register Reg); + + void updateUse(MachineOperand &Op, bool FP); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_GLOBALISEL_INFERTYPEINFOPASS_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index a05e876806ab5..d36b726fa04cc 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -130,6 +130,7 @@ void initializeHardwareLoopsLegacyPass(PassRegistry &); void initializeMIRProfileLoaderPassPass(PassRegistry &); void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &); void initializeIRTranslatorPass(PassRegistry &); +void initializeInferTypeInfoPass(PassRegistry &); void initializeIVUsersWrapperPassPass(PassRegistry &); void initializeIfConverterPass(PassRegistry &); void initializeImmutableModuleSummaryIndexWrapperPassPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt index a45024d120be6..627b629bb7846 100644 --- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_component_library(LLVMGlobalISel GIMatchTableExecutor.cpp GISelChangeObserver.cpp IRTranslator.cpp + InferTypeInfoPass.cpp InlineAsmLowering.cpp InstructionSelect.cpp InstructionSelector.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index efcc40641ea80..b23b9499b4972 100644 --- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -16,6 +16,7 @@ using namespace llvm; void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); + initializeInferTypeInfoPass(Registry); initializeLegalizerPass(Registry); initializeLoadStoreOptPass(Registry); initializeLocalizerPass(Registry); diff --git a/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp b/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp new file mode 100644 index 0000000000000..33766305a6bc4 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp @@ -0,0 +1,505 @@ +//===- llvm/CodeGen/GlobalISel/InferTypeInfoPass.cpp - StripTypeInfoPass ---*- +// C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the InferTypeInfoPass class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/InferTypeInfoPass.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "mir-infer-type-info" + +using namespace llvm; + +char InferTypeInfo::ID = 0; + +INITIALIZE_PASS_BEGIN(InferTypeInfo, DEBUG_TYPE, + "Infer LLT type information based on instructions used", + false, false) +INITIALIZE_PASS_END(InferTypeInfo, DEBUG_TYPE, + "Infer LLT type information based on instructions used", + false, false) + +void InferTypeInfo::init(MachineFunction &MF) { + this->MF = &MF; + MRI = &MF.getRegInfo(); + Builder.setMF(MF); +} + +void InferTypeInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +static LLT updateType(LLT Ty, bool FP) { + LLT InferredScalarTy = FP ? LLT::floatingPoint(Ty.getScalarSizeInBits(), + LLT::FPVariant::IEEE_FLOAT) + : LLT::integer(Ty.getScalarSizeInBits()); + LLT InferredTy = + Ty.isVector() ? Ty.changeElementType(InferredScalarTy) : InferredScalarTy; + + return InferredTy; +} + +void InferTypeInfo::updateDef(Register Reg) { + LLT Ty = MRI->getType(Reg); + LLT InferredTy = updateType(Ty, true); + + if (Ty == InferredTy) + return; + + MRI->setType(Reg, InferredTy); +} + +void InferTypeInfo::updateUse(MachineOperand &Op, bool FP) { + assert(Op.isReg()); + Register Reg = Op.getReg(); + LLT Ty = MRI->getType(Reg); + LLT InferredTy = updateType(Ty, FP); + + if (Ty == InferredTy) + return; + + Register NewReg = MRI->cloneVirtualRegister(Reg); + MRI->setType(NewReg, InferredTy); + + MachineOperand *Def = MRI->getOneDef(Reg); + MachineInstr *MI = Op.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + + Builder.setInsertPt(*MBB, MI); + Builder.buildBitcast(NewReg, Def->getReg()); + Op.setReg(NewReg); +} + +constexpr unsigned MaxFPRSearchDepth = 5; + +bool InferTypeInfo::shouldBeFP(MachineOperand &Op, unsigned Depth = 0) const { + if (Depth > MaxFPRSearchDepth) + return false; + + if (!Op.isReg()) + return false; + + MachineInstr &MI = *Op.getParent(); + + auto Pred = [&](MachineOperand &O) { return shouldBeFP(O, Depth + 1); }; + + // TODO: cache FP registers + + switch (MI.getOpcode()) { + // def and use fp instructions + case TargetOpcode::G_FABS: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FCOPYSIGN: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FATAN2: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_STRICT_FSUB: + case TargetOpcode::G_STRICT_FADD: + case TargetOpcode::G_STRICT_FDIV: + case TargetOpcode::G_STRICT_FLDEXP: + case TargetOpcode::G_STRICT_FMA: + case TargetOpcode::G_STRICT_FMUL: + case TargetOpcode::G_STRICT_FREM: + case TargetOpcode::G_STRICT_FSQRT: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: + case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_SEQ_FADD: + case TargetOpcode::G_VECREDUCE_SEQ_FMUL: + return true; + case TargetOpcode::G_FPOWI: { + return Op.isDef() || Op.getReg() == MI.getOperand(1).getReg(); + } + // use only fp instructions + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return Op.isDef(); + // def only fp instructions + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI_SAT: + case TargetOpcode::G_FPTOUI_SAT: + case TargetOpcode::G_FCMP: + case TargetOpcode::G_LROUND: + case TargetOpcode::G_LLROUND: + return Op.isUse(); + case TargetOpcode::G_BITCAST: + return MRI->getType(Op.getReg()).isFloat() || + MRI->getType(Op.getReg()).isFloatVector(); + case TargetOpcode::G_FREEZE: + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_PHI: + case TargetOpcode::G_SELECT: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_CONCAT_VECTORS: + case TargetOpcode::G_INSERT_SUBVECTOR: + case TargetOpcode::G_EXTRACT_SUBVECTOR: + case TargetOpcode::G_SHUFFLE_VECTOR: + case TargetOpcode::G_SPLAT_VECTOR: + case TargetOpcode::G_STEP_VECTOR: + case TargetOpcode::G_VECTOR_COMPRESS: { + return all_of(MI.all_defs(), + [&](MachineOperand &O) { + return all_of(MRI->use_operands(O.getReg()), Pred); + }) && + all_of(MI.all_uses(), [&](MachineOperand &O) { + return all_of(MRI->def_operands(O.getReg()), Pred); + }); + } + case TargetOpcode::G_INSERT_VECTOR_ELT: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + MachineOperand &Dst = MI.getOperand(0); + MachineOperand &LHS = MI.getOperand(1); + MachineOperand &RHS = MI.getOperand(2); + + return all_of(MRI->use_operands(Dst.getReg()), Pred) && + (!LHS.isReg() || all_of(MRI->def_operands(LHS.getReg()), Pred)) && + (!RHS.isReg() || all_of(MRI->def_operands(RHS.getReg()), Pred)); + } + case TargetOpcode::G_STORE: + case TargetOpcode::G_INDEXED_STORE: { + MachineOperand &Val = MI.getOperand(0); + return Op.getReg() == Val.getReg() && + all_of(MRI->def_operands(Op.getReg()), Pred); + } + case TargetOpcode::G_INDEXED_LOAD: + case TargetOpcode::G_LOAD: { + MachineOperand &Dst = MI.getOperand(0); + return Op.getReg() == Dst.getReg() && + all_of(MRI->use_operands(Dst.getReg()), Pred); + } + case TargetOpcode::G_ATOMICRMW_FADD: + case TargetOpcode::G_ATOMICRMW_FSUB: + case TargetOpcode::G_ATOMICRMW_FMAX: + case TargetOpcode::G_ATOMICRMW_FMIN: { + MachineOperand &WriteBack = MI.getOperand(0); + MachineOperand &FPOp = MI.getOperand(2); + return Op.getReg() == WriteBack.getReg() || Op.getReg() == FPOp.getReg(); + } + case TargetOpcode::G_INTRINSIC_CONVERGENT: + case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + case TargetOpcode::G_INTRINSIC: { + GIntrinsic *Intrinsic = dyn_cast(&MI); + if (!Intrinsic) + return false; + + unsigned Idx = Op.getOperandNo() - + (Op.getOperandNo() > Intrinsic->getNumExplicitDefs()); + switch (Intrinsic->getIntrinsicID()) { + case Intrinsic::amdgcn_rcp: + case Intrinsic::amdgcn_rcp_legacy: + case Intrinsic::amdgcn_rsq: + case Intrinsic::amdgcn_rsq_clamp: + case Intrinsic::amdgcn_rsq_legacy: + case Intrinsic::amdgcn_sqrt: + case Intrinsic::amdgcn_log: + case Intrinsic::amdgcn_log_clamp: + case Intrinsic::amdgcn_sin: + case Intrinsic::amdgcn_exp: + case Intrinsic::amdgcn_cos: + case Intrinsic::amdgcn_exp2: + case Intrinsic::amdgcn_fdiv_fast: + case Intrinsic::amdgcn_fdot2: + case Intrinsic::amdgcn_fdot2_f16_f16: + case Intrinsic::amdgcn_fma_legacy: + case Intrinsic::amdgcn_fmad_ftz: + case Intrinsic::amdgcn_fmed3: + case Intrinsic::amdgcn_fmul_legacy: + case Intrinsic::amdgcn_fract: + case Intrinsic::amdgcn_frexp_exp: + case Intrinsic::amdgcn_div_fixup: + case Intrinsic::amdgcn_div_scale: + case Intrinsic::amdgcn_cvt_pkrtz: + case Intrinsic::amdgcn_fdot2_bf16_bf16: + case Intrinsic::amdgcn_fdot2_f32_bf16: + case Intrinsic::amdgcn_fdot2c_f32_bf16: + case Intrinsic::amdgcn_dot4_f32_bf8_fp8: + case Intrinsic::amdgcn_dot4_f32_fp8_bf8: + case Intrinsic::amdgcn_dot4_f32_bf8_bf8: + case Intrinsic::amdgcn_dot4_f32_fp8_fp8: + return true; + case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k: + case Intrinsic::amdgcn_mfma_f32_16x16x2bf16: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k: + case Intrinsic::amdgcn_mfma_f32_16x16x8bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8: + case Intrinsic::amdgcn_mfma_f32_4x4x2bf16: + case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k: + case Intrinsic::amdgcn_mfma_f32_16x16x16f16: + case Intrinsic::amdgcn_mfma_f32_16x16x1f32: + case Intrinsic::amdgcn_mfma_f32_16x16x32_f16: + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_mfma_f32_16x16x4f16: + case Intrinsic::amdgcn_mfma_f32_16x16x4f32: + case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32: + case Intrinsic::amdgcn_mfma_f32_32x32x16_f16: + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: + case Intrinsic::amdgcn_mfma_f32_32x32x1f32: + case Intrinsic::amdgcn_mfma_f32_32x32x2f32: + case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: + case Intrinsic::amdgcn_mfma_f32_32x32x4f16: + case Intrinsic::amdgcn_mfma_f32_32x32x8f16: + case Intrinsic::amdgcn_mfma_f32_4x4x1f32: + case Intrinsic::amdgcn_mfma_f32_4x4x4f16: + case Intrinsic::amdgcn_mfma_f64_16x16x4f64: + case Intrinsic::amdgcn_mfma_f64_4x4x4f64: + case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: + case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x2bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x4bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k: + case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k: + case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8: + case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16: + case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: + case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16: + case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8: + case Intrinsic::amdgcn_wmma_f16_16x16x16_f16: + case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied: + case Intrinsic::amdgcn_wmma_f32_16x16x16_f16: + case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8: + return Idx == 0 || Idx == 1 || Idx == 2 || Idx == 3; + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_1d: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_1darray: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_2d: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_2darray: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_2darraymsaa: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_2dmsaa: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_3d: + case Intrinsic::amdgcn_image_atomic_pk_add_bf16_cube: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_1d: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_1darray: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_2d: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_2darray: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_2darraymsaa: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_2dmsaa: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_3d: + case Intrinsic::amdgcn_image_atomic_pk_add_f16_cube: + return Idx == 0 || Idx == 1; + case Intrinsic::amdgcn_flat_atomic_fmax_num: + case Intrinsic::amdgcn_flat_atomic_fmin_num: + case Intrinsic::amdgcn_global_atomic_fmax_num: + case Intrinsic::amdgcn_global_atomic_fmin_num: + return Idx == 0 || Idx == 2; + case Intrinsic::amdgcn_raw_buffer_atomic_fadd: + case Intrinsic::amdgcn_raw_buffer_atomic_fmax: + case Intrinsic::amdgcn_raw_buffer_atomic_fmin: + case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd: + case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax: + case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin: + case Intrinsic::amdgcn_struct_buffer_atomic_fadd: + case Intrinsic::amdgcn_struct_buffer_atomic_fmax: + case Intrinsic::amdgcn_struct_buffer_atomic_fmin: + case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: + case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax: + case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin: + return Idx == 0 || Idx == 1; + case Intrinsic::amdgcn_interp_p1: + case Intrinsic::amdgcn_interp_p1_f16: + return Idx == 0 || Idx == 1; + case Intrinsic::amdgcn_interp_p2: + case Intrinsic::amdgcn_interp_p2_f16: + case Intrinsic::amdgcn_interp_p2_rtz_f16: + return Idx == 0 || Idx == 1 || Idx == 2; + case Intrinsic::amdgcn_interp_inreg_p2: + case Intrinsic::amdgcn_interp_inreg_p2_f16: + case Intrinsic::amdgcn_interp_p10_rtz_f16: + case Intrinsic::amdgcn_interp_inreg_p10: + case Intrinsic::amdgcn_interp_inreg_p10_f16: + return Idx == 0 || Idx == 1 || Idx == 2 || Idx == 3; + case Intrinsic::amdgcn_fcmp: + return Idx == 1 || Idx == 2; + case Intrinsic::amdgcn_class: + return Idx == 1; + case Intrinsic::amdgcn_cvt_pknorm_i16: + case Intrinsic::amdgcn_cvt_pknorm_u16: + return Idx == 1 || Idx == 2; + case Intrinsic::amdgcn_div_fmas: + return Idx >= 0 && Idx <= 3; + default: { + dbgs() << "unhandled intrinsic in" << MF->getName() << " " << MI; + } + } + return false; + } + default: + break; + } + + return false; +} + +bool InferTypeInfo::inferTypeInfo(MachineFunction &MF) { + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB.instrs()) { + + for (auto &Def : MI.all_defs()) { + Register Reg = Def.getReg(); + if (!Reg.isVirtual()) + continue; + + if (shouldBeFP(Def)) { + updateDef(Reg); + Changed |= true; + } + } + + for (auto &Use : MI.all_uses()) { + Register Reg = Use.getReg(); + if (!Reg.isVirtual()) + continue; + + bool IsFPDef = MRI->getVRegDef(Reg) && + all_of(MRI->def_operands(Reg), [&](MachineOperand &Op) { + return shouldBeFP(Op); + }); + bool IsFPUse = shouldBeFP(Use); + + if (IsFPUse && !IsFPDef) { + updateUse(Use, true); + Changed |= true; + } else if (!IsFPUse && IsFPDef) { + updateUse(Use, false); + Changed |= true; + } + } + + for (auto &MemOp : MI.memoperands()) { + bool IsFP = any_of(MI.all_defs(), + [&](MachineOperand &O) { return shouldBeFP(O); }) || + any_of(MI.all_uses(), + [&](MachineOperand &O) { return shouldBeFP(O); }); + + if (!IsFP) + continue; + + LLT Ty = MemOp->getType(); + LLT NewTy = updateType(Ty, true); + MemOp->setType(NewTy); + } + } + } + + return Changed; +} + +bool InferTypeInfo::runOnMachineFunction(MachineFunction &MF) { + init(MF); + bool Changed = false; + Changed |= inferTypeInfo(MF); + return Changed; +} \ No newline at end of file diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn index dc9e449195159..37d1cf7e93aea 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn @@ -26,6 +26,7 @@ static_library("GlobalISel") { "GISelKnownBits.cpp", "GlobalISel.cpp", "IRTranslator.cpp", + "InferTypeInfoPass.cpp", "InlineAsmLowering.cpp", "InstructionSelect.cpp", "InstructionSelector.cpp", From f9f93e72b43a4d196816aba4d738baed455bfc0b Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Tue, 11 Feb 2025 13:23:19 +0000 Subject: [PATCH 13/16] FPInfo: Legalizer DSL use bitcasts --- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 109 ++++++++++++++++-- .../CodeGen/GlobalISel/LegalityPredicates.cpp | 58 ++++++++++ .../CodeGen/GlobalISel/LegalizeMutations.cpp | 8 ++ .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 72 +++++++----- 4 files changed, 206 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index f8819d9efd833..663a0ce2d13f3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -282,6 +282,10 @@ LegalityPredicate typePairAndMemDescInSet( std::initializer_list TypesAndMemDescInit); /// True iff the specified type index is a scalar. LegalityPredicate isScalar(unsigned TypeIdx); +/// True iff the specified type index is a integer. +LegalityPredicate isInteger(unsigned TypeIdx); +/// True iff the specified type index is a float. +LegalityPredicate isFloat(unsigned TypeIdx); /// True iff the specified type index is a vector. LegalityPredicate isVector(unsigned TypeIdx); /// True iff the specified type index is a pointer (with any address space). @@ -292,6 +296,14 @@ LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace); /// True iff the specified type index is a vector of pointers (with any address /// space). LegalityPredicate isPointerVector(unsigned TypeIdx); +/// True iff the specified type index is a vector of integer +LegalityPredicate isIntegerVector(unsigned TypeIdx); +/// True iff the specified type index is a vector of floats. +LegalityPredicate isFloatVector(unsigned TypeIdx); + +LegalityPredicate isFloatOrFloatVector(unsigned TypeIdx); + +LegalityPredicate isIntegerOrIntegerVector(unsigned TypeIdx); /// True if the type index is a vector with element type \p EltTy LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy); @@ -330,6 +342,10 @@ LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size); /// True iff the specified type indices are both the same bit size. LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1); +LegalityPredicate sameScalarKind(unsigned TypeIdx, LLT Ty); + +LegalityPredicate sameKind(unsigned TypeIdx, LLT Ty); + /// True iff the first type index has a larger total bit size than second type /// index. LegalityPredicate largerThan(unsigned TypeIdx0, unsigned TypeIdx1); @@ -381,6 +397,8 @@ LegalizeMutation changeElementCountTo(unsigned TypeIdx, ElementCount EC); /// only changes the size. LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx); +LegalizeMutation changeToInteger(unsigned TypeIdx); + /// Widen the scalar type or vector element type for the given type index to the /// next power of 2. LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0); @@ -942,6 +960,16 @@ class LegalizeRuleSet { LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize)); } + LegalizeRuleSet &widenScalarToNextPow2Bitcast(unsigned TypeIdx, + unsigned MinSize = 0) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf( + LegalizeAction::Bitcast, + all(isFloatOrFloatVector(TypeIdx), sizeNotPow2(typeIdx(TypeIdx))), + changeToInteger(TypeIdx)); + } + /// Widen the scalar to the next multiple of Size. No effect if the /// type is not a scalar or is a multiple of Size. LegalizeRuleSet &widenScalarToNextMultipleOf(unsigned TypeIdx, @@ -997,9 +1025,20 @@ class LegalizeRuleSet { LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT Ty) { using namespace LegalityPredicates; using namespace LegalizeMutations; - return actionIf(LegalizeAction::WidenScalar, - scalarOrEltNarrowerThan(TypeIdx, Ty.getScalarSizeInBits()), - changeElementTo(typeIdx(TypeIdx), Ty)); + return actionIf( + LegalizeAction::WidenScalar, + all(sameScalarKind(TypeIdx, Ty), + scalarOrEltNarrowerThan(TypeIdx, Ty.getScalarSizeInBits())), + changeElementTo(typeIdx(TypeIdx), Ty)); + } + LegalizeRuleSet &minScalarOrEltBitcast(unsigned TypeIdx, const LLT Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf( + LegalizeAction::Bitcast, + all(isFloatOrFloatVector(TypeIdx), + scalarOrEltNarrowerThan(TypeIdx, Ty.getScalarSizeInBits())), + changeToInteger(typeIdx(TypeIdx))); } /// Ensure the scalar or element is at least as wide as Ty. @@ -1007,10 +1046,11 @@ class LegalizeRuleSet { unsigned TypeIdx, const LLT Ty) { using namespace LegalityPredicates; using namespace LegalizeMutations; - return actionIf(LegalizeAction::WidenScalar, - all(Predicate, scalarOrEltNarrowerThan( - TypeIdx, Ty.getScalarSizeInBits())), - changeElementTo(typeIdx(TypeIdx), Ty)); + return actionIf( + LegalizeAction::WidenScalar, + all(Predicate, sameScalarKind(TypeIdx, Ty), + scalarOrEltNarrowerThan(TypeIdx, Ty.getScalarSizeInBits())), + changeElementTo(typeIdx(TypeIdx), Ty)); } /// Ensure the vector size is at least as wide as VectorSize by promoting the @@ -1039,7 +1079,8 @@ class LegalizeRuleSet { using namespace LegalityPredicates; using namespace LegalizeMutations; return actionIf(LegalizeAction::WidenScalar, - scalarNarrowerThan(TypeIdx, Ty.getSizeInBits()), + all(sameKind(TypeIdx, Ty), + scalarNarrowerThan(TypeIdx, Ty.getSizeInBits())), changeTo(typeIdx(TypeIdx), Ty)); } LegalizeRuleSet &minScalar(bool Pred, unsigned TypeIdx, const LLT Ty) { @@ -1047,6 +1088,14 @@ class LegalizeRuleSet { return *this; return minScalar(TypeIdx, Ty); } + LegalizeRuleSet &minScalarBitcast(unsigned TypeIdx, const LLT Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf(LegalizeAction::Bitcast, + all(isFloatOrFloatVector(TypeIdx), + scalarNarrowerThan(TypeIdx, Ty.getSizeInBits())), + changeToInteger(typeIdx(TypeIdx))); + } /// Ensure the scalar is at least as wide as Ty if condition is met. LegalizeRuleSet &minScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, @@ -1068,9 +1117,20 @@ class LegalizeRuleSet { LegalizeRuleSet &maxScalarOrElt(unsigned TypeIdx, const LLT Ty) { using namespace LegalityPredicates; using namespace LegalizeMutations; - return actionIf(LegalizeAction::NarrowScalar, - scalarOrEltWiderThan(TypeIdx, Ty.getScalarSizeInBits()), - changeElementTo(typeIdx(TypeIdx), Ty)); + return actionIf( + LegalizeAction::NarrowScalar, + all(sameScalarKind(TypeIdx, Ty), + scalarOrEltWiderThan(TypeIdx, Ty.getScalarSizeInBits())), + changeElementTo(typeIdx(TypeIdx), Ty)); + } + LegalizeRuleSet &maxScalarOrEltBitcast(unsigned TypeIdx, const LLT Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf( + LegalizeAction::NarrowScalar, + all(isFloatOrFloatVector(TypeIdx), + scalarOrEltWiderThan(TypeIdx, Ty.getScalarSizeInBits())), + changeToInteger(typeIdx(TypeIdx))); } /// Ensure the scalar is at most as wide as Ty. @@ -1078,9 +1138,18 @@ class LegalizeRuleSet { using namespace LegalityPredicates; using namespace LegalizeMutations; return actionIf(LegalizeAction::NarrowScalar, - scalarWiderThan(TypeIdx, Ty.getSizeInBits()), + all(sameKind(TypeIdx, Ty), + scalarWiderThan(TypeIdx, Ty.getSizeInBits())), changeTo(typeIdx(TypeIdx), Ty)); } + LegalizeRuleSet &maxScalarBitcast(unsigned TypeIdx, const LLT Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf(LegalizeAction::NarrowScalar, + all(isFloatOrFloatVector(TypeIdx), + scalarWiderThan(TypeIdx, Ty.getSizeInBits())), + changeToInteger(typeIdx(TypeIdx))); + } /// Conditionally limit the maximum size of the scalar. /// For example, when the maximum size of one type depends on the size of @@ -1103,10 +1172,20 @@ class LegalizeRuleSet { /// Limit the range of scalar sizes to MinTy and MaxTy. LegalizeRuleSet &clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy) { + assert(MinTy.getKind() == MaxTy.getKind() && + "Expected LLT of the same kind"); assert(MinTy.isScalar() && MaxTy.isScalar() && "Expected scalar types"); return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy); } + LegalizeRuleSet &clampScalarBitcast(unsigned TypeIdx, const LLT MinTy, + const LLT MaxTy) { + assert(MinTy.getKind() == MaxTy.getKind() && + "Expected LLT of the same kind"); + assert(MinTy.isScalar() && MaxTy.isScalar() && "Expected scalar types"); + return minScalarBitcast(TypeIdx, MinTy).maxScalarBitcast(TypeIdx, MaxTy); + } + LegalizeRuleSet &clampScalar(bool Pred, unsigned TypeIdx, const LLT MinTy, const LLT MaxTy) { if (!Pred) @@ -1120,6 +1199,12 @@ class LegalizeRuleSet { return minScalarOrElt(TypeIdx, MinTy).maxScalarOrElt(TypeIdx, MaxTy); } + LegalizeRuleSet &clampScalarOrEltBitcast(unsigned TypeIdx, const LLT MinTy, + const LLT MaxTy) { + return minScalarOrEltBitcast(TypeIdx, MinTy) + .maxScalarOrEltBitcast(TypeIdx, MaxTy); + } + /// Widen the scalar to match the size of another. LegalizeRuleSet &minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx) { typeIdx(TypeIdx); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 30c2d089c3121..6db01579ea431 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -81,6 +81,18 @@ LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) { }; } +LegalityPredicate LegalityPredicates::isInteger(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isInteger(); + }; +} + +LegalityPredicate LegalityPredicates::isFloat(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isFloat(); + }; +} + LegalityPredicate LegalityPredicates::isVector(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { return Query.Types[TypeIdx].isVector(); @@ -107,6 +119,30 @@ LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) { }; } +LegalityPredicate LegalityPredicates::isIntegerVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isIntegerVector(); + }; +} +LegalityPredicate +LegalityPredicates::isIntegerOrIntegerVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + LLT Ty = Query.Types[TypeIdx]; + return Ty.isInteger() || Ty.isIntegerVector(); + }; +} +LegalityPredicate LegalityPredicates::isFloatVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isFloatVector(); + }; +} +LegalityPredicate LegalityPredicates::isFloatOrFloatVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + LLT Ty = Query.Types[TypeIdx]; + return Ty.isFloat() || Ty.isFloatVector(); + }; +} + LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx, LLT EltTy) { return [=](const LegalityQuery &Query) { @@ -200,6 +236,28 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, }; } +LegalityPredicate LegalityPredicates::sameScalarKind(unsigned TypeIdx, LLT Ty) { + return [=](const LegalityQuery &Query) { + LLT QueryTy = Query.Types[TypeIdx].getScalarType(); + if (Ty.isFloat()) + return QueryTy.getKind() == Ty.getKind() && + QueryTy.getFPVariant() == Ty.getFPVariant(); + + return QueryTy.getKind() == Ty.getKind(); + }; +} + +LegalityPredicate LegalityPredicates::sameKind(unsigned TypeIdx, LLT Ty) { + return [=](const LegalityQuery &Query) { + LLT QueryTy = Query.Types[TypeIdx]; + if (Ty.isFloat() || Ty.isFloatVector()) + return QueryTy.getKind() == Ty.getKind() && + QueryTy.getFPVariant() == Ty.getFPVariant(); + + return QueryTy.getKind() == Ty.getKind(); + }; +} + LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { return !llvm::has_single_bit( diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index ded4df4edc14c..dd6b7d2ac90b3 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -72,6 +72,14 @@ LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, }; } +LegalizeMutation LegalizeMutations::changeToInteger(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = OldTy.changeToInteger(); + return std::make_pair(TypeIdx, NewTy); + }; +} + LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fc48a6c181360..79173b21075f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -788,7 +788,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor(AddrSpaces32) .legalFor(AddrSpaces128) .legalIf(isPointer(0)) + .clampScalarBitcast(0, I16, I256) .clampScalar(0, I16, I256) + .widenScalarToNextPow2Bitcast(0, 32) .widenScalarToNextPow2(0, 32) .clampMaxNumElements(0, I32, 16) .clampMaxNumElements(0, F32, 16) @@ -961,7 +963,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({F32, F64, F16, BF16}) - .clampScalar(0, F16, F64); + .clampScalarBitcast(0, I16, I64) + .clampScalar(0, I16, I64); getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) .legalIf(isRegisterClassType(ST, 0)) @@ -971,7 +974,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampNumElements(0, V16I32, V32I32) .clampNumElements(0, V16F32, V32F32) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .clampScalarOrEltBitcast(0, I32, MaxScalar) .clampScalarOrElt(0, I32, MaxScalar) + .widenScalarToNextPow2Bitcast(0, 32) .widenScalarToNextPow2(0, 32) .clampMaxNumElements(0, I32, 16) .clampMaxNumElements(0, F32, 16); @@ -1590,6 +1595,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: load/store narrowing should be moved to lower action Actions + .bitcastIf( + [=](const LegalityQuery &Query) -> bool { + return !Query.Types[0].isVector() && Query.Types[0].isFloat() && + needToSplitMemOp(Query, Op == G_LOAD); + }, + changeToInteger(0)) .narrowScalarIf( [=](const LegalityQuery &Query) -> bool { return !Query.Types[0].isVector() && @@ -1676,8 +1687,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // May need relegalization for the scalars. return std::pair(0, EltTy); }) + .minScalarBitcast(0, I32) .minScalar(0, I32) + .bitcastIf(all(predNot(isIntegerOrIntegerVector(0)), + isWideScalarExtLoadTruncStore(0)), + changeToInteger(0)) .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, I32)) + .widenScalarToNextPow2Bitcast(0) .widenScalarToNextPow2(0) .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)) .lower(); @@ -1810,6 +1826,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, LLT::fixed_vector(2, LocalPtr), LLT::fixed_vector(2, PrivatePtr)}, {I1, I32}) + .clampScalarBitcast(0, I16, I64) .clampScalar(0, I16, I64) .scalarize(1) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) @@ -1819,6 +1836,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampMaxNumElements(0, LocalPtr, 2) .clampMaxNumElements(0, PrivatePtr, 2) .scalarize(0) + .widenScalarToNextPow2Bitcast(0) .widenScalarToNextPow2(0) .legalIf(all(isPointer(0), typeInSet(1, {I1, I32}))); @@ -1914,7 +1932,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, LLT::fixed_vector(VecSize / TargetEltSize, LLT::integer(TargetEltSize))); }) + .clampScalarBitcast(EltTypeIdx, I32, I64) .clampScalar(EltTypeIdx, I32, I64) + .clampScalarBitcast(VecTypeIdx, I32, I64) .clampScalar(VecTypeIdx, I32, I64) .clampScalar(IdxTypeIdx, I32, I32) .clampMaxNumElements(VecTypeIdx, I32, 32) @@ -1940,13 +1960,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) .lowerIf(all(typeIs(LitTyIdx, I16), sizeIs(BigTyIdx, 32))) - .lowerIf([=](const LegalityQuery &Query) { - // Sub-vector(or single element) insert and extract. - // TODO: verify immediate offset here since lower only works with - // whole elements. - const LLT BigTy = Query.Types[BigTyIdx]; - return BigTy.isVector(); - }) + .lowerIf(isVector(BigTyIdx)) // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; @@ -1954,19 +1968,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return (BigTy.getSizeInBits() % 32 == 0) && (LitTy.getSizeInBits() % 16 == 0); }) + .bitcastIf(all(predNot(isIntegerOrIntegerVector(BigTyIdx)), + scalarOrEltNarrowerThan(BigTyIdx, 16)), + changeToInteger(BigTyIdx)) .widenScalarIf( - [=](const LegalityQuery &Query) { - const LLT BigTy = Query.Types[BigTyIdx]; - return (BigTy.getScalarSizeInBits() < 16); - }, + scalarOrEltNarrowerThan(BigTyIdx, 16), LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16)) + .bitcastIf(all(predNot(isIntegerOrIntegerVector(LitTyIdx)), + scalarOrEltNarrowerThan(LitTyIdx, 16)), + changeToInteger(LitTyIdx)) .widenScalarIf( - [=](const LegalityQuery &Query) { - const LLT LitTy = Query.Types[LitTyIdx]; - return (LitTy.getScalarSizeInBits() < 16); - }, + scalarOrEltNarrowerThan(LitTyIdx, 16), LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16)) .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) + .widenScalarToNextPow2Bitcast(BigTyIdx, 32) .widenScalarToNextPow2(BigTyIdx, 32); } @@ -1986,7 +2001,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasScalarPackInsts()) { BuildVector // FIXME: Should probably widen s1 vectors straight to s32 + .minScalarOrEltBitcast(0, I16) .minScalarOrElt(0, I16) + .minScalarBitcast(1, I16) .minScalar(1, I16); getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) @@ -1996,6 +2013,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, BuildVector.customFor({V2I16, I16}); BuildVector.customFor({V2F16, F16}); BuildVector.customFor({V2BF16, BF16}); + BuildVector.minScalarOrEltBitcast(0, I32); BuildVector.minScalarOrElt(0, I32); getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) @@ -2042,13 +2060,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .lowerFor({{I16, V2I16}}) .lowerFor({{F16, V2F16}}) .lowerFor({{BF16, V2BF16}}) - .lowerIf([=](const LegalityQuery &Query) { - const LLT BigTy = Query.Types[BigTyIdx]; - return BigTy.getSizeInBits() == 32; - }) + .lowerIf(sizeIs(BigTyIdx, 32)) // Try to widen to s16 first for small types. // TODO: Only do this on targets with legal s16 shifts - .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, I16) + .minScalarBitcast(LitTyIdx, I16) + .minScalar(LitTyIdx, I16) + .widenScalarToNextPow2Bitcast(LitTyIdx, /*Min*/ 16) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) @@ -2064,7 +2081,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Clamp the little scalar to s8-s256 and make it a power of 2. It's // not worth considering the multiples of 64 since 2*192 and 2*384 // are not valid. + .clampScalarBitcast(LitTyIdx, I32, I512) .clampScalar(LitTyIdx, I32, I512) + .widenScalarToNextPow2Bitcast(LitTyIdx, /*Min*/ 32) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) // Break up vectors with weird elements into scalars .fewerElementsIf( @@ -2077,16 +2096,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return notValidElt(Query, BigTyIdx); }, scalarize(1)) + .clampScalarBitcast(BigTyIdx, I32, MaxScalar) .clampScalar(BigTyIdx, I32, MaxScalar); if (Op == G_MERGE_VALUES) { - Builder.widenScalarIf( - // TODO: Use 16-bit shifts if legal for 8-bit values? - [=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[LitTyIdx]; - return Ty.getSizeInBits() < 32; - }, - changeTo(LitTyIdx, I32)); + Builder.minScalarBitcast(LitTyIdx, I32).minScalar(LitTyIdx, I32); } Builder.widenScalarIf( @@ -2104,7 +2118,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (RoundedTo < NewSizeInBits) NewSizeInBits = RoundedTo; } - return std::pair(BigTyIdx, LLT::scalar(NewSizeInBits)); + return std::pair(BigTyIdx, LLT::integer(NewSizeInBits)); }) // Any vectors left are the wrong size. Scalarize them. .scalarize(0) From 3e669c8558872db0aa892fc13fca59091653120d Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Thu, 13 Feb 2025 09:48:29 +0000 Subject: [PATCH 14/16] FPInfo: MachineVerifier FPInfo: MachineIRBuilder assertions --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 130 ++++++++++---- llvm/lib/CodeGen/MachineVerifier.cpp | 164 +++++++++++++++--- 3 files changed, 235 insertions(+), 61 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index e04bcebbffd03..b9d9608494837 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -239,7 +239,7 @@ class MachineIRBuilder { unsigned getOpcodeForMerge(const DstOp &DstOp, ArrayRef SrcOps) const; protected: - void validateTruncExt(const LLT Dst, const LLT Src, bool IsExtend); + void validateTruncExt(const LLT Dst, const LLT Src, unsigned Opc); void validateUnaryOp(const LLT Res, const LLT Op0); void validateBinaryOp(const LLT Res, const LLT Op0, const LLT Op1); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 6ee4931c4653c..ffe8b06647488 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -194,7 +194,7 @@ void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0, void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0, const LLT Op1) { - assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); + assert((Res.isInteger() || Res.isIntegerVector()) && "invalid operand type"); assert((Res == Op0) && "type mismatch"); } @@ -203,7 +203,8 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional Flags) { assert(Res.getLLTTy(*getMRI()).isPointerOrPointerVector() && Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); - assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type"); + assert(Op1.getLLTTy(*getMRI()).getScalarType().isInteger() && + "invalid offset type"); return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags); } @@ -228,7 +229,7 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits) { LLT PtrTy = Res.getLLTTy(*getMRI()); - LLT MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + LLT MaskTy = LLT::integer(PtrTy.getSizeInBits()); Register MaskReg = getMRI()->createGenericVirtualRegister(MaskTy); buildConstant(MaskReg, maskTrailingZeros(NumBits)); return buildPtrMask(Res, Op0, MaskReg); @@ -355,7 +356,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, == EltTy.getSizeInBits() && "creating fconstant with the wrong size"); - assert(!Ty.isPointer() && "invalid operand type"); + assert((Ty.isFloat() || Ty.isFloatVector()) && "invalid operand type"); assert(!Ty.isScalableVector() && "unexpected scalable vector in buildFConstant"); @@ -412,7 +413,7 @@ MachineIRBuilder::buildConstantPtrAuth(const DstOp &Res, MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest) { - assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); + assert(Tst.getLLTTy(*getMRI()).isInteger() && "invalid operand type"); auto MIB = buildInstr(TargetOpcode::G_BRCOND); Tst.addSrcToMIB(MIB); @@ -459,7 +460,7 @@ MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset( return buildLoad(Dst, BasePtr, *OffsetMMO); LLT PtrTy = BasePtr.getLLTTy(*getMRI()); - LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + LLT OffsetTy = LLT::integer(PtrTy.getSizeInBits()); auto ConstOffset = buildConstant(OffsetTy, Offset); auto Ptr = buildPtrAdd(PtrTy, BasePtr, ConstOffset); return buildLoad(Dst, Ptr, *OffsetMMO); @@ -550,10 +551,10 @@ MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc || TargetOpcode::G_SEXT == ExtOpc) && "Expecting Extending Opc"); - assert(Res.getLLTTy(*getMRI()).isScalar() || - Res.getLLTTy(*getMRI()).isVector()); - assert(Res.getLLTTy(*getMRI()).isScalar() == - Op.getLLTTy(*getMRI()).isScalar()); + assert(Res.getLLTTy(*getMRI()).isInteger() || + Res.getLLTTy(*getMRI()).isIntegerVector()); + assert(Res.getLLTTy(*getMRI()).isInteger() == + Op.getLLTTy(*getMRI()).isInteger()); unsigned Opcode = TargetOpcode::COPY; if (Res.getLLTTy(*getMRI()).getSizeInBits() > @@ -1208,8 +1209,11 @@ MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) { } void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy, - bool IsExtend) { + unsigned Opc) { #ifndef NDEBUG + bool IsExtend = Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_ZEXT || + Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_FPEXT; + bool IsFP = Opc == TargetOpcode::G_FPEXT || Opc == TargetOpcode::G_FPTRUNC; if (DstTy.isVector()) { assert(SrcTy.isVector() && "mismatched cast between vector and non-vector"); assert(SrcTy.getElementCount() == DstTy.getElementCount() && @@ -1223,6 +1227,18 @@ void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy, else assert(TypeSize::isKnownLT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) && "invalid widening trunc"); + + if (IsFP) { + assert(DstTy.getScalarType().isFloat() && + "fpext/fptrunc destinaton type must be float"); + assert(SrcTy.getScalarType().isFloat() && + "fpext/fptrunc source type must be float"); + } else { + assert(!DstTy.getScalarType().isFloat() && + "ext/trunc destinaton type must not be float"); + assert(!SrcTy.getScalarType().isFloat() && + "ext/trunc source type must not be float"); + } #endif } @@ -1284,11 +1300,16 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef DstOps, case TargetOpcode::G_USUBSAT: case TargetOpcode::G_SSUBSAT: { // All these are binary ops. + LLT DstTy = DstOps[0].getLLTTy(*getMRI()); + LLT Src1Ty = SrcOps[0].getLLTTy(*getMRI()); + LLT Src2Ty = SrcOps[1].getLLTTy(*getMRI()); + assert(DstTy.getScalarType().isInteger() && "Invalid destination type"); + assert((Src1Ty.getScalarType().isInteger() || + Src2Ty.getScalarType().isIntegerVector()) && + "Invalid source type"); assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 2 && "Invalid Srcs"); - validateBinaryOp(DstOps[0].getLLTTy(*getMRI()), - SrcOps[0].getLLTTy(*getMRI()), - SrcOps[1].getLLTTy(*getMRI())); + validateBinaryOp(DstTy, Src1Ty, Src2Ty); break; } case TargetOpcode::G_SHL: @@ -1306,17 +1327,13 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef DstOps, case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: - assert(DstOps.size() == 1 && "Invalid Dst"); - assert(SrcOps.size() == 1 && "Invalid Srcs"); - validateTruncExt(DstOps[0].getLLTTy(*getMRI()), - SrcOps[0].getLLTTy(*getMRI()), true); - break; + case TargetOpcode::G_FPEXT: case TargetOpcode::G_TRUNC: case TargetOpcode::G_FPTRUNC: { assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 1 && "Invalid Srcs"); validateTruncExt(DstOps[0].getLLTTy(*getMRI()), - SrcOps[0].getLLTTy(*getMRI()), false); + SrcOps[0].getLLTTy(*getMRI()), Opc); break; } case TargetOpcode::G_BITCAST: { @@ -1346,47 +1363,88 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef DstOps, }() && "Invalid predicate"); assert(SrcOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) && "Type mismatch"); + LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI()); + LLT Op1Ty = SrcOps[2].getLLTTy(*getMRI()); + LLT DstTy = DstOps[0].getLLTTy(*getMRI()); assert([&]() -> bool { - LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI()); - LLT DstTy = DstOps[0].getLLTTy(*getMRI()); if (Op0Ty.isScalar() || Op0Ty.isPointer()) return DstTy.isScalar(); else return DstTy.isVector() && DstTy.getElementCount() == Op0Ty.getElementCount(); }() && "Type Mismatch"); + + if (Opc == TargetOpcode::G_ICMP) { + assert(!Op0Ty.getScalarType().isFloat() && + !Op1Ty.getScalarType().isFloat() && + "G_ICMP operands cannot be float"); + } else { + assert(Op0Ty.getScalarType().isFloat() && + Op1Ty.getScalarType().isFloat() && + "G_FCMP operands must be float"); + } break; } case TargetOpcode::G_UNMERGE_VALUES: { assert(!DstOps.empty() && "Invalid trivial sequence"); assert(SrcOps.size() == 1 && "Invalid src for Unmerge"); + + LLT DstTy = DstOps[0].getLLTTy(*getMRI()); + LLT SrcTy = SrcOps[0].getLLTTy(*getMRI()); + unsigned NumDsts = DstOps.size(); assert(llvm::all_of(DstOps, [&, this](const DstOp &Op) { - return Op.getLLTTy(*getMRI()) == - DstOps[0].getLLTTy(*getMRI()); + return Op.getLLTTy(*getMRI()) == DstTy; }) && "type mismatch in output list"); - assert((TypeSize::ScalarTy)DstOps.size() * - DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == - SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && + assert(NumDsts * DstTy.getSizeInBits() == SrcTy.getSizeInBits() && "input operands do not cover output register"); + + if (DstTy.isVector()) { + assert(SrcTy.isVector() && "G_UNMERGE_VALUES source operand does not " + "match vector destination operands"); + assert(SrcTy.getScalarType() == DstTy.getScalarType() || + SrcTy.isPointerVector() || + SrcTy.getSizeInBits() == NumDsts * DstTy.getSizeInBits() && + "G_UNMERGE_VALUES source operand does " + "not match vector destination operands"); + } else if (SrcTy.isFloatVector()) { + assert(DstTy.isFloat() && + "G_UNMERGE_VALUES source vector element type does not match " + "scalar destination type"); + + assert(NumDsts == SrcTy.getNumElements() && + "G_UNMERGE_VALUES number of destination operands has to match " + "the number of vector elements for float vectors"); + } else { + assert(!SrcTy.isFloat() && !DstTy.isFloat() && + "G_UNMERGE_VALUES is not supported for scalar float operands"); + } + break; } case TargetOpcode::G_MERGE_VALUES: { assert(SrcOps.size() >= 2 && "invalid trivial sequence"); assert(DstOps.size() == 1 && "Invalid Dst"); + unsigned NumSrcs = SrcOps.size(); + LLT SrcTy = SrcOps[0].getLLTTy(*getMRI()); assert(llvm::all_of(SrcOps, [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); + return Op.getLLTTy(*getMRI()) == SrcTy; }) && "type mismatch in input list"); - assert((TypeSize::ScalarTy)SrcOps.size() * - SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == - DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return !Op.getLLTTy(*getMRI()).isFloat(); + }) && + "float types are not alowed in input list"); + LLT DstTy = DstOps[0].getLLTTy(*getMRI()); + assert(NumSrcs * SrcTy.getSizeInBits() == DstTy.getSizeInBits() && "input operands do not cover output register"); - assert(!DstOps[0].getLLTTy(*getMRI()).isVector() && + assert(!DstTy.isVector() && "vectors should be built with G_CONCAT_VECTOR or G_BUILD_VECTOR"); + assert(!DstTy.isFloat() && + "G_MERGE_VALUES float result types are not allowed"); break; } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { @@ -1438,8 +1496,10 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef DstOps, assert((!SrcOps.empty() || SrcOps.size() < 2) && "Must have at least 2 operands"); assert(DstOps.size() == 1 && "Invalid DstOps"); - assert(DstOps[0].getLLTTy(*getMRI()).isVector() && - "Res type must be a vector"); + assert(DstOps[0].getLLTTy(*getMRI()).isIntegerVector() && + "Res type must be a vector of integers"); + assert(SrcOps[0].getLLTTy(*getMRI()).isInteger() && + "Src type must be an integer"); assert(llvm::all_of(SrcOps, [&, this](const SrcOp &Op) { return Op.getLLTTy(*getMRI()) == diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 87d3033038414..6b5109a32b740 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1239,6 +1239,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("Instruction cannot use a vector result type", MI); if (MI->getOpcode() == TargetOpcode::G_CONSTANT) { + if (!DstTy.isInteger() && !DstTy.isPointer()) { + report("G_CONSTANT must have an integer/pointer result type", MI); + break; + } if (!MI->getOperand(1).isCImm()) { report("G_CONSTANT operand must be cimm", MI); break; @@ -1248,16 +1252,27 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (CI->getBitWidth() != DstTy.getSizeInBits()) report("inconsistent constant size", MI); } else { + if (!DstTy.isFloat()) { + report("G_FCONSTANT must have a float result type", MI); + break; + } if (!MI->getOperand(1).isFPImm()) { report("G_FCONSTANT operand must be fpimm", MI); break; } const ConstantFP *CF = MI->getOperand(1).getFPImm(); - if (APFloat::getSizeInBits(CF->getValueAPF().getSemantics()) != - DstTy.getSizeInBits()) { + const fltSemantics &CSem = CF->getValueAPF().getSemantics(); + + if (APFloat::getSizeInBits(CSem) != DstTy.getSizeInBits()) { report("inconsistent constant size", MI); } + + const fltSemantics &DstSem = getFltSemanticForLLT(DstTy); + + if (&CSem != &DstSem) { + report("inconsistent floating point semantics", MI); + } } break; @@ -1280,6 +1295,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { const MachineMemOperand &MMO = **MI->memoperands_begin(); if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD || MI->getOpcode() == TargetOpcode::G_SEXTLOAD) { + if (ValTy.isFloat() || ValTy.isFloatVector()) { + report("Generic extload must have an integer result type", MI); + break; + } if (TypeSize::isKnownGE(MMO.getSizeInBits().getValue(), ValTy.getSizeInBits())) report("Generic extload must have a narrower memory type", MI); @@ -1291,7 +1310,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (MMO.getRanges()) { ConstantInt *i = mdconst::extract(MMO.getRanges()->getOperand(0)); - const LLT RangeTy = LLT::scalar(i->getIntegerType()->getBitWidth()); + const LLT RangeTy = LLT::integer(i->getIntegerType()->getBitWidth()); const LLT MemTy = MMO.getMemoryType(); if (MemTy.getScalarType() != RangeTy || ValTy.isScalar() != MemTy.isScalar() || @@ -1370,13 +1389,13 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (MI->getOpcode() == TargetOpcode::G_INTTOPTR) { if (!DstTy.isPointer()) report("inttoptr result type must be a pointer", MI); - if (SrcTy.isPointer()) - report("inttoptr source type must not be a pointer", MI); + if (!SrcTy.isInteger()) + report("inttoptr source type must be an integer", MI); } else if (MI->getOpcode() == TargetOpcode::G_PTRTOINT) { if (!SrcTy.isPointer()) report("ptrtoint source type must be a pointer", MI); - if (DstTy.isPointer()) - report("ptrtoint result type must not be a pointer", MI); + if (!DstTy.isInteger()) + report("ptrtoint result type must be an integer", MI); } else { assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST); if (!SrcTy.isPointer() || !DstTy.isPointer()) @@ -1399,8 +1418,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!PtrTy.isPointerOrPointerVector()) report("gep first operand must be a pointer", MI); - if (OffsetTy.isPointerOrPointerVector()) - report("gep offset operand must not be a pointer", MI); + if (!OffsetTy.isInteger() && !OffsetTy.isIntegerVector()) + report("gep offset operand must be an integer", MI); if (PtrTy.isPointerOrPointerVector()) { const DataLayout &DL = MF->getDataLayout(); @@ -1467,6 +1486,36 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { MI); break; } + + switch (MI->getOpcode()) { + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_TRUNC: { + if (DstTy.isFloat() || DstTy.isFloatVector()) { + report("generic ext/trunc result type must not be a float", MI); + break; + } + + if (SrcTy.isFloat() || SrcTy.isFloatVector()) + report("generic ext/trunc source type must not be a float", MI); + + break; + } + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: { + if (!DstTy.isFloat() && !DstTy.isFloatVector()) { + report("generic fpext/fptrunc result type must be float", MI); + break; + } + + if (!SrcTy.isFloat() && !SrcTy.isFloatVector()) + report("generic fpext/fptrunc source type must be a float", MI); + + break; + } + } + break; } case TargetOpcode::G_SELECT: { @@ -1493,10 +1542,15 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { const unsigned NumOps = MI->getNumOperands(); if (DstTy.getSizeInBits() != SrcTy.getSizeInBits() * (NumOps - 1)) report("G_MERGE_VALUES result size is inconsistent", MI); + if (DstTy.isFloat()) + report("G_MERGE_VALUES float result types are not allowed", MI); for (unsigned I = 2; I != NumOps; ++I) { - if (MRI->getType(MI->getOperand(I).getReg()) != SrcTy) + LLT OpTy = MRI->getType(MI->getOperand(I).getReg()); + if (OpTy != SrcTy) report("G_MERGE_VALUES source types do not match", MI); + if (OpTy.isFloat()) + report("G_MERGE_VALUES float source types are not allowed", MI); } break; @@ -1526,17 +1580,37 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // This case is the converse of G_BUILD_VECTOR, but relaxed to allow // mismatched types as long as the total size matches: // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<4 x s32>) - if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) + if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) { report("G_UNMERGE_VALUES vector source operand does not match scalar " "destination operands", MI); + break; + } + if (DstTy.isFloat()) { + if (!SrcTy.isFloatVector()) { + report("G_UNMERGE_VALUES source vector element type does not match " + "scalar destination type", + MI); + break; + } + if (NumDsts != SrcTy.getNumElements()) { + report("G_UNMERGE_VALUES number of destination operands has to match " + "the number of vector elements for float vectors", + MI); + break; + } + } } else { // This case is the converse of G_MERGE_VALUES. if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) { report("G_UNMERGE_VALUES scalar source operand does not match scalar " "destination operands", MI); + break; } + if (SrcTy.isFloat() || DstTy.isFloat()) + report("G_UNMERGE_VALUES is not supported for scalar float operands", + MI); } break; } @@ -1578,6 +1652,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("G_BUILD_VECTOR_TRUNC source operand types are not larger than " "dest elt type", MI); + if (DstTy.isFloatVector() || SrcEltTy.isFloat()) { + report("G_BUILD_VECTOR_TRUNC source operand types cannot be float", MI); + } break; } case TargetOpcode::G_CONCAT_VECTORS: { @@ -1610,6 +1687,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { DstTy.getElementCount() != SrcTy.getElementCount())) report("Generic vector icmp/fcmp must preserve number of lanes", MI); + if (MI->getOpcode() == TargetOpcode::G_ICMP) { + if (SrcTy.isFloat() || SrcTy.isFloatVector()) { + report("G_ICMP cannot operate on float operands", MI); + } + } else if (MI->getOpcode() == TargetOpcode::G_FCMP) { + if (!SrcTy.isFloat() && !SrcTy.isFloatVector()) { + report("G_FCMP can only operate on float operands", MI); + } + } + break; } case TargetOpcode::G_SCMP: @@ -1654,13 +1741,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits(); - unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits(); + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(SrcOp.getReg()); + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = SrcTy.getSizeInBits(); if (SrcSize == DstSize) report("extract source must be larger than result", MI); if (DstSize + OffsetOp.getImm() > SrcSize) report("extract reads past end of register", MI); + + if (SrcTy.isFloat() || SrcTy.isFloatVector()) + report("extract source must not be a float", MI); + + if (DstTy.isFloat() || DstTy.isFloatVector()) + report("extract result must not be a float", MI); + break; } case TargetOpcode::G_INSERT: { @@ -1676,8 +1772,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits(); - unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits(); + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(SrcOp.getReg()); + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = SrcTy.getSizeInBits(); if (DstSize <= SrcSize) report("inserted size must be smaller than total register", MI); @@ -1685,6 +1783,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (SrcSize + OffsetOp.getImm() > DstSize) report("insert writes past end of register", MI); + if (SrcTy.isFloat() || SrcTy.isFloatVector()) + report("insert source must not be a float", MI); + + if (DstTy.isFloat() || DstTy.isFloatVector()) + report("insert result must not be a float", MI); + break; } case TargetOpcode::G_JUMP_TABLE: { @@ -1738,6 +1842,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("G_SEXT_INREG size must be >= 1", MI); if (Imm >= SrcTy.getScalarSizeInBits()) report("G_SEXT_INREG size must be less than source bit width", MI); + if (SrcTy.isFloat() || SrcTy.isFloatVector()) + report("G_SEXT_INREG source must not be float", MI); break; } case TargetOpcode::G_BSWAP: { @@ -2134,12 +2240,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg()); LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg()); - if (!DstTy.isScalar()) - report("Vector reduction requires a scalar destination type", MI); - if (!Src1Ty.isScalar()) - report("Sequential FADD/FMUL vector reduction requires a scalar 1st operand", MI); - if (!Src2Ty.isVector()) - report("Sequential FADD/FMUL vector reduction must have a vector 2nd operand", MI); + if (!DstTy.isFloat()) + report("Vector reduction requires a float destination type", MI); + if (!Src1Ty.isFloat()) + report( + "Sequential FADD/FMUL vector reduction requires a float 1st operand", + MI); + if (!Src2Ty.isFloatVector()) + report("Sequential FADD/FMUL vector reduction must have a float vector " + "2nd operand", + MI); break; } case TargetOpcode::G_VECREDUCE_FADD: @@ -2158,8 +2268,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { case TargetOpcode::G_VECREDUCE_UMAX: case TargetOpcode::G_VECREDUCE_UMIN: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - if (!DstTy.isScalar()) - report("Vector reduction requires a scalar destination type", MI); + if (!DstTy.isFloat()) + report("Vector reduction requires a float destination type", MI); break; } @@ -2185,6 +2295,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { MI); break; } + if (Src1Ty.isFloat() || Src1Ty.isFloatVector() || Src2Ty.isFloat() || + Src2Ty.isFloatVector()) { + report("Shifts and rotates require operands to be integers", MI); + } break; } case TargetOpcode::G_LLROUND: @@ -2214,8 +2328,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); LLT SrcEltTy = SrcTy.getScalarType(); - if (!SrcEltTy.isScalar()) { - report("Source must be a scalar or vector of scalars", MI); + if (!SrcEltTy.isFloat()) { + report("Source must be a float or vector of floats", MI); break; } if (!verifyVectorElementMatch(DestTy, SrcTy, MI)) From 5f28389ad467cff0bfaa3ddddcc0d7c38cadbe91 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Thu, 23 Jan 2025 17:11:29 +0000 Subject: [PATCH 15/16] update ll tests --- .../CodeGen/AMDGPU/GlobalISel/add.v2i16.ll | 99 +- .../AMDGPU/GlobalISel/amdgpu-irtranslator.ll | 2 +- llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll | 2 + .../AMDGPU/GlobalISel/atomic_load_flat.ll | 56 +- .../AMDGPU/GlobalISel/atomic_load_global.ll | 12 + .../AMDGPU/GlobalISel/atomic_load_local_2.ll | 8 + .../buffer-atomic-fadd.f32-no-rtn.ll | 48 +- .../GlobalISel/buffer-atomic-fadd.f32-rtn.ll | 48 +- .../GlobalISel/buffer-atomic-fadd.f64.ll | 32 +- .../buffer-atomic-fadd.v2f16-no-rtn.ll | 32 +- .../buffer-atomic-fadd.v2f16-rtn.ll | 16 +- .../GlobalISel/buffer-load-store-pointers.ll | 28 +- .../GlobalISel/combine-fma-add-ext-mul.ll | 24 +- .../GlobalISel/combine-fma-sub-ext-neg-mul.ll | 50 +- .../AMDGPU/GlobalISel/combine-fma-sub-mul.ll | 24 +- .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll | 25 +- .../GlobalISel/dereferenceable-declaration.ll | 172 +- .../CodeGen/AMDGPU/GlobalISel/dummy-target.ll | 64 +- .../CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll | 897 +- .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 18 +- .../AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll | 16 +- .../AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll | 4 +- .../GlobalISel/flat-atomic-fadd.v2f16.ll | 8 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll | 177 +- .../CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll | 12 + llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll | 280 +- llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll | 212 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 39 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 105 +- .../AMDGPU/GlobalISel/function-returns.ll | 1233 +- .../GlobalISel/function-returns.v2i65.ll | 18 +- .../global-atomic-fadd.f32-no-rtn.ll | 51 +- .../GlobalISel/global-atomic-fadd.f32-rtn.ll | 69 +- .../GlobalISel/global-atomic-fadd.f64.ll | 24 +- .../global-atomic-fadd.v2f16-no-rtn.ll | 8 +- .../global-atomic-fadd.v2f16-rtn.ll | 4 +- .../CodeGen/AMDGPU/GlobalISel/global-value.ll | 84 +- .../GlobalISel/image-waterfall-loop-O0.ll | 6 +- .../GlobalISel/inline-asm-mismatched-size.ll | 52 +- .../AMDGPU/GlobalISel/irtranslate-bf16.ll | 638 +- .../irtranslator-amdgcn-cs-chain.ll | 160 +- .../GlobalISel/irtranslator-amdgcn-sendmsg.ll | 4 +- .../GlobalISel/irtranslator-amdgpu_kernel.ll | 1600 +- .../GlobalISel/irtranslator-amdgpu_ps.ll | 105 +- .../GlobalISel/irtranslator-amdgpu_vs.ll | 64 +- .../GlobalISel/irtranslator-assert-align.ll | 160 +- .../GlobalISel/irtranslator-atomicrmw.ll | 153 +- .../irtranslator-call-abi-attribute-hints.ll | 164 +- .../irtranslator-call-implicit-args.ll | 1416 +- .../GlobalISel/irtranslator-call-non-fixed.ll | 70 +- .../irtranslator-call-return-values.ll | 3030 ++-- .../GlobalISel/irtranslator-call-sret.ll | 84 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 6751 ++++---- .../irtranslator-constant-fold-vector-op.ll | 10 +- .../GlobalISel/irtranslator-constantexpr.ll | 12 +- .../GlobalISel/irtranslator-constrained-fp.ll | 148 +- .../irtranslator-fast-math-flags.ll | 2 +- ...translator-fixed-function-abi-vgpr-args.ll | 66 +- .../GlobalISel/irtranslator-function-args.ll | 3064 ++-- .../irtranslator-function-args.v2i65.ll | 24 +- .../GlobalISel/irtranslator-getelementptr.ll | 230 +- .../GlobalISel/irtranslator-indirect-call.ll | 64 +- .../GlobalISel/irtranslator-inline-asm.ll | 123 +- .../GlobalISel/irtranslator-invariant.ll | 78 +- .../irtranslator-memory-intrinsics.ll | 176 +- .../GlobalISel/irtranslator-metadata.ll | 8 +- ...tor-non-integral-address-spaces-vectors.ll | 98 +- ...rtranslator-non-integral-address-spaces.ll | 14 +- .../GlobalISel/irtranslator-prefetch.ll | 12 +- .../AMDGPU/GlobalISel/irtranslator-ptrmask.ll | 24 +- .../AMDGPU/GlobalISel/irtranslator-sat.ll | 372 +- .../GlobalISel/irtranslator-sibling-call.ll | 1822 +-- .../irtranslator-struct-return-intrinsics.ll | 12 +- .../GlobalISel/irtranslator-tail-call.ll | 30 +- .../GlobalISel/irtranslator-zext-vec-index.ll | 20 +- ...galize-llvm.amdgcn.image.atomic.dim.a16.ll | 2678 +-- .../legalize-llvm.amdgcn.image.dim.a16.ll | 8240 +++++----- .../legalize-llvm.amdgcn.image.load.2d.d16.ll | 2472 +-- .../legalize-llvm.amdgcn.image.load.2d.ll | 969 +- ...lize-llvm.amdgcn.image.load.2darraymsaa.ll | 182 +- .../legalize-llvm.amdgcn.image.load.3d.ll | 128 +- .../legalize-llvm.amdgcn.image.sample.a16.ll | 13474 +++++++++------- .../legalize-llvm.amdgcn.image.sample.d.ll | 891 +- ...galize-llvm.amdgcn.image.sample.g16.a16.ll | 783 +- .../legalize-llvm.amdgcn.image.sample.g16.ll | 4542 +++--- ...legalize-llvm.amdgcn.image.store.2d.d16.ll | 741 +- .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll | 65 +- .../llvm.amdgcn.image.load.1d.d16.ll | 21 +- .../GlobalISel/llvm.amdgcn.intersect_ray.ll | 144 +- .../llvm.amdgcn.make.buffer.rsrc.ll | 10 +- .../llvm.amdgcn.raw.buffer.atomic.add.ll | 32 +- .../llvm.amdgcn.raw.buffer.atomic.cmpswap.ll | 40 +- ....amdgcn.raw.buffer.atomic.fadd-with-ret.ll | 2 +- .../llvm.amdgcn.raw.buffer.atomic.fadd.ll | 40 +- .../llvm.amdgcn.raw.buffer.load.format.f16.ll | 30 +- .../llvm.amdgcn.raw.buffer.load.format.ll | 32 +- .../GlobalISel/llvm.amdgcn.raw.buffer.load.ll | 140 +- .../llvm.amdgcn.raw.buffer.load.tfe.ll | 331 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 66 +- ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 48 +- .../llvm.amdgcn.raw.buffer.store.ll | 132 +- .../llvm.amdgcn.raw.ptr.buffer.atomic.add.ll | 16 +- ...vm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll | 20 +- ...gcn.raw.ptr.buffer.atomic.fadd-with-ret.ll | 2 +- .../llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll | 40 +- ...m.amdgcn.raw.ptr.buffer.load.format.f16.ll | 20 +- .../llvm.amdgcn.raw.ptr.buffer.load.format.ll | 12 +- .../llvm.amdgcn.raw.ptr.buffer.load.ll | 72 +- ....amdgcn.raw.ptr.buffer.store.format.f16.ll | 44 +- ....amdgcn.raw.ptr.buffer.store.format.f32.ll | 24 +- .../llvm.amdgcn.raw.ptr.buffer.store.ll | 68 +- .../llvm.amdgcn.raw.ptr.tbuffer.load.ll | 18 +- .../llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll | 40 +- .../llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll | 16 +- .../llvm.amdgcn.raw.ptr.tbuffer.store.ll | 50 +- .../llvm.amdgcn.raw.tbuffer.load.ll | 36 +- .../llvm.amdgcn.raw.tbuffer.store.f16.ll | 60 +- .../llvm.amdgcn.raw.tbuffer.store.i8.ll | 24 +- .../llvm.amdgcn.raw.tbuffer.store.ll | 100 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 608 +- .../AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll | 6 +- .../AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll | 3 +- .../AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll | 6 +- .../llvm.amdgcn.struct.buffer.atomic.add.ll | 28 +- ...lvm.amdgcn.struct.buffer.atomic.cmpswap.ll | 40 +- ...dgcn.struct.buffer.atomic.fadd-with-ret.ll | 2 +- .../llvm.amdgcn.struct.buffer.atomic.fadd.ll | 40 +- ...vm.amdgcn.struct.buffer.load.format.f16.ll | 36 +- .../llvm.amdgcn.struct.buffer.load.format.ll | 64 +- .../llvm.amdgcn.struct.buffer.load.ll | 72 +- .../llvm.amdgcn.struct.buffer.load.tfe.ll | 2852 ++-- ...m.amdgcn.struct.buffer.store.format.f16.ll | 30 +- ...m.amdgcn.struct.buffer.store.format.f32.ll | 24 +- .../llvm.amdgcn.struct.buffer.store.ll | 40 +- ...lvm.amdgcn.struct.ptr.buffer.atomic.add.ll | 14 +- ...amdgcn.struct.ptr.buffer.atomic.cmpswap.ll | 20 +- ....struct.ptr.buffer.atomic.fadd-with-ret.ll | 2 +- ...vm.amdgcn.struct.ptr.buffer.atomic.fadd.ll | 40 +- ...mdgcn.struct.ptr.buffer.load.format.f16.ll | 24 +- ...vm.amdgcn.struct.ptr.buffer.load.format.ll | 32 +- .../llvm.amdgcn.struct.ptr.buffer.load.ll | 32 +- ...dgcn.struct.ptr.buffer.store.format.f16.ll | 20 +- ...dgcn.struct.ptr.buffer.store.format.f32.ll | 12 +- .../llvm.amdgcn.struct.ptr.buffer.store.ll | 20 +- .../llvm.amdgcn.struct.ptr.tbuffer.load.ll | 14 +- .../llvm.amdgcn.struct.tbuffer.load.ll | 28 +- .../AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll | 9 +- .../AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll | 6 +- .../AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll | 6 +- .../load-legalize-range-metadata.ll | 124 +- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 2 + llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll | 4 +- .../promote-dependency-on-invariant-result.ll | 6 +- .../regbankselect-amdgcn.image.load.1d.ll | 384 +- .../regbankselect-amdgcn.image.sample.1d.ll | 644 +- .../regbankselect-amdgcn.raw.buffer.load.ll | 189 +- ...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 189 +- .../regbankselect-amdgcn.s.buffer.load.ll | 3980 ++--- ...regbankselect-amdgcn.struct.buffer.load.ll | 181 +- ...egbankselect-amdgcn.struct.buffer.store.ll | 172 +- ...ankselect-amdgcn.struct.ptr.buffer.load.ll | 181 +- ...nkselect-amdgcn.struct.ptr.buffer.store.ll | 172 +- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 14 +- .../AMDGPU/GlobalISel/select-to-fmin-fmax.ll | 56 +- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll | 2 + .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 14 +- .../AMDGPU/GlobalISel/strict_fma.f16.ll | 37 +- .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 2 + .../AMDGPU/GlobalISel/unsupported-load.ll | 2 +- .../AMDGPU/GlobalISel/unsupported-ptr-add.ll | 2 +- .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 2 + ...wmma-gfx12-w32-f16-f32-matrix-modifiers.ll | 13 +- ...wmma-gfx12-w64-f16-f32-matrix-modifiers.ll | 7 +- 173 files changed, 38024 insertions(+), 34247 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll index c8b82716a9fe1..7782ebf5af13b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -40,11 +40,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { ; GFX7-LABEL: v_add_v2i16_fneg_lhs: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -79,11 +76,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { ; GFX7-LABEL: v_add_v2i16_fneg_rhs: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX7-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2 +; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -v3 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -118,18 +112,12 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { ; GFX7-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX7-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3 +; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2 +; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1 +; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -v3 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs: @@ -434,17 +422,15 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) { ; GFX7-LABEL: s_add_v2i16_fneg_lhs: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff -; GFX7-NEXT: s_or_b32 s0, s1, s0 -; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000 -; GFX7-NEXT: s_lshr_b32 s1, s0, 16 -; GFX7-NEXT: s_add_i32 s1, s1, s3 -; GFX7-NEXT: s_add_i32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s1, s1, 0xffff -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s1 +; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, s3, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_add_v2i16_fneg_lhs: @@ -490,17 +476,15 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) { ; GFX7-LABEL: s_add_v2i16_fneg_rhs: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_lshl_b32 s3, s3, 16 -; GFX7-NEXT: s_and_b32 s2, s2, 0xffff -; GFX7-NEXT: s_or_b32 s2, s3, s2 -; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000 -; GFX7-NEXT: s_lshr_b32 s3, s2, 16 -; GFX7-NEXT: s_add_i32 s1, s1, s3 -; GFX7-NEXT: s_add_i32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s1, s1, 0xffff -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s3 +; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, s1, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_add_v2i16_fneg_rhs: @@ -546,22 +530,17 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) { ; GFX7-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff -; GFX7-NEXT: s_or_b32 s0, s1, s0 -; GFX7-NEXT: s_lshl_b32 s1, s3, 16 -; GFX7-NEXT: s_and_b32 s2, s2, 0xffff -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000 -; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000 -; GFX7-NEXT: s_lshr_b32 s2, s0, 16 -; GFX7-NEXT: s_lshr_b32 s3, s1, 16 -; GFX7-NEXT: s_add_i32 s2, s2, s3 -; GFX7-NEXT: s_add_i32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s2, 0xffff -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s0 +; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s2 +; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -s1 +; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -s3 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll index 00c3bf30671e0..9aa96da2645b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll @@ -5,7 +5,7 @@ ; Tests for add. ; CHECK: name: addi32 -; CHECK: {{%[0-9]+}}:_(s32) = G_ADD +; CHECK: {{%[0-9]+}}:_(i32) = G_ADD define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) { %res = add i32 %arg1, %arg2 store i32 %res, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll index 493e8cef63890..eb8e5abafb69d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -695,6 +695,7 @@ define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: s_sext_i32_i16 s0, s0 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: ashr_i16_sv: @@ -722,6 +723,7 @@ define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) { ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: ashr_i16_vs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll index 83912b1e77db2..b1f7d94850e0e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll @@ -98,23 +98,53 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) { } define half @atomic_load_flat_monotonic_f16(ptr %ptr) { -; GCN-LABEL: atomic_load_flat_monotonic_f16: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: flat_load_ushort v0, v[0:1] glc -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: atomic_load_flat_monotonic_f16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: atomic_load_flat_monotonic_f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: atomic_load_flat_monotonic_f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic half, ptr %ptr monotonic, align 2 ret half %load } define bfloat @atomic_load_flat_monotonic_bf16(ptr %ptr) { -; GCN-LABEL: atomic_load_flat_monotonic_bf16: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: flat_load_ushort v0, v[0:1] glc -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: atomic_load_flat_monotonic_bf16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: atomic_load_flat_monotonic_bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: atomic_load_flat_monotonic_bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic bfloat, ptr %ptr monotonic, align 2 ret bfloat %load } @@ -125,6 +155,7 @@ define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: flat_load_ushort v0, v[0:1] glc ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %load = load atomic half, ptr %ptr monotonic, align 2 %cast = bitcast half %load to i16 @@ -138,6 +169,7 @@ define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: flat_load_ushort v0, v[0:1] glc ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %load = load atomic bfloat, ptr %ptr monotonic, align 2 %cast = bitcast bfloat %load to i16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll index e2906c3d4fdb2..591a6cc492a35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll @@ -299,6 +299,7 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) { ; GFX6-NEXT: s_mov_b64 s[4:5], 0 ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_f16: @@ -306,6 +307,7 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_f16: @@ -334,6 +336,7 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) { ; GFX6-NEXT: s_mov_b64 s[4:5], 0 ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_bf16: @@ -341,6 +344,7 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_bf16: @@ -369,6 +373,7 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX6-NEXT: s_mov_b64 s[4:5], 0 ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: @@ -376,6 +381,7 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: @@ -383,6 +389,7 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: @@ -390,6 +397,7 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic half, ptr addrspace(1) %ptr monotonic, align 2 %cast = bitcast half %load to i16 @@ -406,6 +414,7 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX6-NEXT: s_mov_b64 s[4:5], 0 ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: @@ -413,6 +422,7 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: @@ -420,6 +430,7 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: @@ -427,6 +438,7 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr) ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic bfloat, ptr addrspace(1) %ptr monotonic, align 2 %cast = bitcast bfloat %load to i16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll index 1656814d6fb06..77a82d6ae122c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll @@ -234,6 +234,7 @@ define half @atomic_load_local_monotonic_f16(ptr addrspace(3) %ptr) { ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: ds_read_u16 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_f16: @@ -261,6 +262,7 @@ define bfloat @atomic_load_local_monotonic_bf16(ptr addrspace(3) %ptr) { ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: ds_read_u16 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_bf16: @@ -288,6 +290,7 @@ define i32 @atomic_load_local_monotonic_f16_zext_to_i32(ptr addrspace(3) %ptr) { ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: ds_read_u16 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_f16_zext_to_i32: @@ -296,6 +299,7 @@ define i32 @atomic_load_local_monotonic_f16_zext_to_i32(ptr addrspace(3) %ptr) { ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: ds_read_u16 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_local_monotonic_f16_zext_to_i32: @@ -303,6 +307,7 @@ define i32 @atomic_load_local_monotonic_f16_zext_to_i32(ptr addrspace(3) %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: ds_read_u16 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2 %cast = bitcast half %load to i16 @@ -317,6 +322,7 @@ define i32 @atomic_load_local_monotonic_bf16_zext_to_i32(ptr addrspace(3) %ptr) ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: ds_read_u16 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_bf16_zext_to_i32: @@ -325,6 +331,7 @@ define i32 @atomic_load_local_monotonic_bf16_zext_to_i32(ptr addrspace(3) %ptr) ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: ds_read_u16 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_local_monotonic_bf16_zext_to_i32: @@ -332,6 +339,7 @@ define i32 @atomic_load_local_monotonic_bf16_zext_to_i32(ptr addrspace(3) %ptr) ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: ds_read_u16 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2 %cast = bitcast bfloat %load to i16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll index 714328a42d675..70819731a7b79 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn @@ -31,7 +31,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn @@ -45,7 +45,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -64,7 +64,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn @@ -79,7 +79,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn @@ -94,7 +94,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -113,7 +113,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn @@ -128,7 +128,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn @@ -143,7 +143,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -164,7 +164,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn @@ -181,7 +181,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn @@ -198,7 +198,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -216,7 +216,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn @@ -230,7 +230,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn @@ -244,7 +244,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -263,7 +263,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn @@ -278,7 +278,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn @@ -293,7 +293,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -312,7 +312,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn @@ -327,7 +327,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn @@ -342,7 +342,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -363,7 +363,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn @@ -380,7 +380,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn @@ -397,7 +397,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll index fb95d99e9f65b..2fdc8dfc3ebc2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -31,7 +31,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -46,7 +46,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) @@ -66,7 +66,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -82,7 +82,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -98,7 +98,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -118,7 +118,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -134,7 +134,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -150,7 +150,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) @@ -172,7 +172,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -190,7 +190,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -208,7 +208,7 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -227,7 +227,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -242,7 +242,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -257,7 +257,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) @@ -277,7 +277,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -293,7 +293,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -309,7 +309,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -329,7 +329,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -345,7 +345,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -361,7 +361,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) @@ -383,7 +383,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr ad ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -401,7 +401,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr ad ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -419,7 +419,7 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr ad ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll index 23931ac358843..ecb18b10aa283 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -37,7 +37,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -58,7 +58,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -81,7 +81,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i3 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -101,7 +101,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec @@ -128,7 +128,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec @@ -155,7 +155,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec @@ -184,7 +184,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec @@ -210,7 +210,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -231,7 +231,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -252,7 +252,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -275,7 +275,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -295,7 +295,7 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec @@ -322,7 +322,7 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr a ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec @@ -349,7 +349,7 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr a ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec @@ -378,7 +378,7 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (f64) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll index 3ef735ddb7635..6f6f5b0681d5e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, < ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn @@ -29,7 +29,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, < ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -48,7 +48,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn @@ -63,7 +63,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -82,7 +82,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn @@ -97,7 +97,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -118,7 +118,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, < ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn @@ -135,7 +135,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, < ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -153,7 +153,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn @@ -167,7 +167,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -186,7 +186,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn @@ -201,7 +201,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -220,7 +220,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn @@ -235,7 +235,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -256,7 +256,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn @@ -273,7 +273,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll index 756f287b77988..e9bfafe15208e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll @@ -14,7 +14,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) @@ -34,7 +34,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -54,7 +54,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) @@ -76,7 +76,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) @@ -95,7 +95,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) @@ -115,7 +115,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> % ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -135,7 +135,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> % ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) @@ -157,7 +157,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll index 20735bb6c21c6..86d30c9e7049a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll @@ -12,7 +12,7 @@ define ptr @buffer_load_p0(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i64) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1 ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]] @@ -36,7 +36,7 @@ define void @buffer_store_p0(ptr %data, ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (i64) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void @@ -53,7 +53,7 @@ define ptr addrspace(1) @buffer_load_p1(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i64) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1 ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]] @@ -77,7 +77,7 @@ define void @buffer_store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (i64) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void @@ -94,7 +94,7 @@ define ptr addrspace(4) @buffer_load_p4(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i64) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1 ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]] @@ -118,7 +118,7 @@ define void @buffer_store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (i64) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void @@ -135,7 +135,7 @@ define ptr addrspace(5) @buffer_load_p5(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-NEXT: SI_RETURN implicit $vgpr0 %ret = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) @@ -154,7 +154,7 @@ define void @buffer_store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (i32) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void @@ -171,7 +171,7 @@ define <2 x ptr addrspace(1)> @buffer_load_v2p1(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s64>) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x i64>) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2 @@ -203,7 +203,7 @@ define void @buffer_store_v2p5(<2 x ptr addrspace(1)> %data, ptr addrspace(8) in ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s64>) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x i64>) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void @@ -220,7 +220,7 @@ define <3 x ptr addrspace(5)> @buffer_load_v3p5(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX3_OFFSET:%[0-9]+]]:vreg_96_align2 = BUFFER_LOAD_DWORDX3_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX3_OFFSET:%[0-9]+]]:vreg_96_align2 = BUFFER_LOAD_DWORDX3_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub1 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub2 @@ -247,7 +247,7 @@ define void @buffer_store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) in ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x i32>) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void @@ -264,7 +264,7 @@ define <4 x ptr addrspace(5)> @buffer_load_v4p5(ptr addrspace(8) inreg %buf) { ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>) from %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2 @@ -294,7 +294,7 @@ define void @buffer_store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) in ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr19 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.buf, align 1, addrspace 8) + ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x i32>) into %ir.buf, align 1, addrspace 8) ; GFX9-NEXT: SI_RETURN call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll index 4d603f7487754..1f59bbad0e4b9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll @@ -73,14 +73,14 @@ define amdgpu_vs <5 x float> @test_5xf16_5xf32_add_ext_mul(<5 x half> inreg %x, ; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8 ; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9 ; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s11, s0, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s1, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s3, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s4, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s11, s1, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s0, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s4, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s3, 16 ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0] -; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s11, s13, v1 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s12, s14, v1 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0] -; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s12, s14, v3 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s11, s13, v3 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: @@ -121,16 +121,16 @@ define amdgpu_vs <6 x float> @test_6xf16_6xf32_add_ext_mul_rhs(<6 x half> inreg ; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9 ; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10 ; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v5, s11 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s0, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s1, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s1, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s0, 16 ; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s6, s2, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s3, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s15, s4, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s4, 16 +; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s15, s3, 16 ; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s16, s5, 16 ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0] -; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s12, s14, v1 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s13, s15, v1 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0] -; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s13, s15, v3 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s12, s14, v3 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v5, s6, s16, v5 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll index 814a34754e883..ac879d56d1253 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll @@ -97,12 +97,13 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul(<4 x half> %x, ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3 -; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v0, v2, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v3, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: @@ -131,12 +132,13 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul(<4 x half> %x, ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3 -; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v0, v2, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v3, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: @@ -166,12 +168,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul2(<4 x float> % ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7 -; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v6, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v7, v2 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %y, %z @@ -199,12 +201,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul2(<4 x float> % ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7 -; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0] -; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v6, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v7, v2 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %y, %z diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll index e8e29c3d4b526..0e3a346c2e8ba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll @@ -618,19 +618,19 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> % ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v5 +; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v5 +; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v1 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v4 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v1, v1, v5 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v6, v2 -; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v7, v3 +; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v6, v3 +; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v7, v2 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v2 -; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v3 +; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v2 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul <4 x half> %x, %y @@ -717,19 +717,19 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v5 +; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v5 +; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v1 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v4, v0 ; GFX11-DENORM-NEXT: v_sub_f16_e32 v1, v5, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v2, v6 -; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v3, v7 +; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v3, v6 +; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v2, v7 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v2 -; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v3 +; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v2 ; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul <4 x half> %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll index 4ddbb0afd7fc5..d988399690134 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll @@ -280,6 +280,7 @@ define half @v_uitofp_i32_to_f16_mask255(i32 %arg0) nounwind { ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_i32_to_f16_mask255: @@ -300,6 +301,7 @@ define half @v_sitofp_i32_to_f16_mask255(i32 %arg0) nounwind { ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_sitofp_i32_to_f16_mask255: @@ -320,6 +322,7 @@ define half @v_uitofp_to_f16_lshr8_mask255(i32 %arg0) nounwind { ; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_to_f16_lshr8_mask255: @@ -341,6 +344,7 @@ define half @v_uitofp_to_f16_lshr16_mask255(i32 %arg0) nounwind { ; SI-NEXT: v_bfe_u32 v0, v0, 16, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_to_f16_lshr16_mask255: @@ -356,12 +360,20 @@ define half @v_uitofp_to_f16_lshr16_mask255(i32 %arg0) nounwind { } define half @v_uitofp_to_f16_lshr24_mask255(i32 %arg0) nounwind { -; GCN-LABEL: v_uitofp_to_f16_lshr24_mask255: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 -; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: v_uitofp_to_f16_lshr24_mask255: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_uitofp_to_f16_lshr24_mask255: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 +; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-NEXT: s_setpc_b64 s[30:31] %lshr.16 = lshr i32 %arg0, 24 %masked = and i32 %lshr.16, 255 %cvt = uitofp i32 %masked to half @@ -375,6 +387,7 @@ define half @v_uitofp_i8_to_f16(i8 %arg0) nounwind { ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_i8_to_f16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll index 13828c2d8a6a0..0936691ff8093 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll @@ -14,18 +14,18 @@ define i64 @load_deref_declaration_only() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (i64) from %ir.call) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call ptr @declared_with_ret_deref() %load = load i64, ptr %call, align 8 @@ -38,18 +38,18 @@ define i64 @load_deref_unknown_decl() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (i64) from %ir.call) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call ptr @unknown_decl() %load = load i64, ptr %call, align 8 @@ -62,18 +62,18 @@ define i64 @load_deref_callsite_only() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (i64) from %ir.call) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call dereferenceable(8) ptr @unknown_decl() %load = load i64, ptr %call, align 8 @@ -87,30 +87,30 @@ define i64 @load_deref_maxmimum_callsite_declaration_only() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (i64) from %ir.call0) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (i64) from %ir.call1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i64) = G_ADD [[LOAD]], [[LOAD1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ADD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call0 = call dereferenceable(4) ptr @declared_with_ret_deref() %load0 = load i64, ptr %call0, align 8 @@ -126,18 +126,18 @@ define i64 @load_deref_or_null_declaration_only() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (i64) from %ir.call) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call nonnull ptr @declared_with_ret_deref_or_null() %load = load i64, ptr %call, align 8 @@ -150,18 +150,18 @@ define i64 @load_deref_or_null_nonnull_decl() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (i64) from %ir.call) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call ptr @nonnull_decl() %load = load i64, ptr %call, align 8 @@ -174,18 +174,18 @@ define i64 @load_deref_or_null_callsite_only() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (i64) from %ir.call) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call dereferenceable_or_null(8) ptr @nonnull_decl() %load = load i64, ptr %call, align 8 @@ -199,30 +199,30 @@ define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (i64) from %ir.call0) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (i64) from %ir.call1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i64) = G_ADD [[LOAD]], [[LOAD1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ADD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call0 = call dereferenceable_or_null(4) nonnull ptr @declared_with_ret_deref_or_null() %load0 = load i64, ptr %call0, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll index 9eeb633f0a817..cf7d8f4796d58 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -8,11 +8,11 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add @@ -23,24 +23,24 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add @@ -51,9 +51,9 @@ define i16 @halfinsts_add_i16(i16 %arg0) #1 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add @@ -64,12 +64,12 @@ define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll index 5ba036c386a40..395d4e433a1b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll @@ -19,6 +19,8 @@ define half @v_fdiv_f16(half %a, half %b) { ; GFX6-IEEE-LABEL: v_fdiv_f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 @@ -33,11 +35,15 @@ define half @v_fdiv_f16(half %a, half %b) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 @@ -53,8 +59,8 @@ define half @v_fdiv_f16(half %a, half %b) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_fdiv_f16: @@ -193,11 +199,14 @@ define half @v_fdiv_f16_afn(half %a, half %b) { ; GFX6-LABEL: v_fdiv_f16_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_fdiv_f16_afn: @@ -229,6 +238,8 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) { ; GFX6-IEEE-LABEL: v_fdiv_f16_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 @@ -243,11 +254,15 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_f16_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 @@ -263,8 +278,8 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_fdiv_f16_ulp25: @@ -403,6 +418,7 @@ define half @v_neg_rcp_f16(half %x) { ; GFX6-IEEE-LABEL: v_neg_rcp_f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -417,11 +433,14 @@ define half @v_neg_rcp_f16(half %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rcp_f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -437,8 +456,8 @@ define half @v_neg_rcp_f16(half %x) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_neg_rcp_f16: @@ -460,6 +479,7 @@ define half @v_rcp_f16(half %x) { ; GFX6-IEEE-LABEL: v_rcp_f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -474,11 +494,14 @@ define half @v_rcp_f16(half %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -494,8 +517,8 @@ define half @v_rcp_f16(half %x) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rcp_f16: @@ -517,6 +540,7 @@ define half @v_rcp_f16_arcp(half %x) { ; GFX6-IEEE-LABEL: v_rcp_f16_arcp: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -531,11 +555,14 @@ define half @v_rcp_f16_arcp(half %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_f16_arcp: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -551,8 +578,8 @@ define half @v_rcp_f16_arcp(half %x) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rcp_f16_arcp: @@ -574,11 +601,13 @@ define half @v_rcp_f16_arcp_afn(half %x) { ; GFX6-LABEL: v_rcp_f16_arcp_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, v1, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rcp_f16_arcp_afn: @@ -600,6 +629,7 @@ define half @v_rcp_f16_ulp25(half %x) { ; GFX6-IEEE-LABEL: v_rcp_f16_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -614,11 +644,14 @@ define half @v_rcp_f16_ulp25(half %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_f16_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -634,8 +667,8 @@ define half @v_rcp_f16_ulp25(half %x) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rcp_f16_ulp25: @@ -657,11 +690,14 @@ define half @v_fdiv_f16_afn_ulp25(half %a, half %b) { ; GFX6-LABEL: v_fdiv_f16_afn_ulp25: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_fdiv_f16_afn_ulp25: @@ -693,6 +729,8 @@ define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) { ; GFX6-IEEE-LABEL: v_fdiv_f16_arcp_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 @@ -707,11 +745,15 @@ define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_f16_arcp_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 @@ -727,8 +769,8 @@ define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_fdiv_f16_arcp_ulp25: @@ -760,6 +802,10 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX6-IEEE-LABEL: v_fdiv_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -788,11 +834,18 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 @@ -810,7 +863,6 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 @@ -826,6 +878,8 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_fdiv_v2f16: @@ -833,11 +887,11 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX8-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v6 +; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v3 ; GFX8-IEEE-NEXT: v_rcp_f32_e32 v5, v2 -; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v3 +; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v6 ; GFX8-IEEE-NEXT: v_mul_f32_e32 v9, v4, v5 ; GFX8-IEEE-NEXT: v_mul_f32_e64 v10, -v2, v9 ; GFX8-IEEE-NEXT: v_add_f32_e32 v10, v10, v4 @@ -862,7 +916,7 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX8-IEEE-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX8-IEEE-NEXT: v_div_fixup_f16 v1, v4, v6, v3 +; GFX8-IEEE-NEXT: v_div_fixup_f16 v1, v4, v3, v6 ; GFX8-IEEE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-IEEE-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-IEEE-NEXT: s_setpc_b64 s[30:31] @@ -872,11 +926,11 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX8-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v8, v6 +; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v8, v3 ; GFX8-FLUSH-NEXT: v_rcp_f32_e32 v5, v2 -; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v3 +; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v6 ; GFX8-FLUSH-NEXT: v_mul_f32_e32 v9, v4, v5 ; GFX8-FLUSH-NEXT: v_mad_f32 v10, -v2, v9, v4 ; GFX8-FLUSH-NEXT: v_mac_f32_e32 v9, v10, v5 @@ -895,7 +949,7 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX8-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX8-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v6, v3 +; GFX8-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v3, v6 ; GFX8-FLUSH-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-FLUSH-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-FLUSH-NEXT: s_setpc_b64 s[30:31] @@ -905,11 +959,11 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX9-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v6 +; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v3 ; GFX9-IEEE-NEXT: v_rcp_f32_e32 v5, v2 -; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v3 +; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v6 ; GFX9-IEEE-NEXT: v_mul_f32_e32 v9, v4, v5 ; GFX9-IEEE-NEXT: v_mul_f32_e64 v10, -v2, v9 ; GFX9-IEEE-NEXT: v_add_f32_e32 v10, v10, v4 @@ -934,7 +988,7 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX9-IEEE-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX9-IEEE-NEXT: v_div_fixup_f16 v1, v4, v6, v3 +; GFX9-IEEE-NEXT: v_div_fixup_f16 v1, v4, v3, v6 ; GFX9-IEEE-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-IEEE-NEXT: s_setpc_b64 s[30:31] ; @@ -943,11 +997,11 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v5 +; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v3 ; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 -; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v6, v3 +; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v6, v5 ; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v7, v7 ; GFX9-FLUSH-NEXT: v_mul_f32_e32 v4, v4, v2 ; GFX9-FLUSH-NEXT: v_mad_mix_f32 v8, -v1, v4, v0 op_sel_hi:[1,0,1] @@ -966,7 +1020,7 @@ define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v5, v3 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v3, v5 ; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -1076,6 +1130,10 @@ define <2 x half> @v_fdiv_v2f16_afn(<2 x half> %a, <2 x half> %b) { ; GFX6-LABEL: v_fdiv_v2f16_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -1086,6 +1144,8 @@ define <2 x half> @v_fdiv_v2f16_afn(<2 x half> %a, <2 x half> %b) { ; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_afn: @@ -1138,6 +1198,10 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-IEEE-LABEL: v_fdiv_v2f16_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -1166,11 +1230,18 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 @@ -1188,7 +1259,6 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 @@ -1204,6 +1274,8 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_fdiv_v2f16_ulp25: @@ -1211,11 +1283,11 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX8-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v6 +; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v3 ; GFX8-IEEE-NEXT: v_rcp_f32_e32 v5, v2 -; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v3 +; GFX8-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v6 ; GFX8-IEEE-NEXT: v_mul_f32_e32 v9, v4, v5 ; GFX8-IEEE-NEXT: v_mul_f32_e64 v10, -v2, v9 ; GFX8-IEEE-NEXT: v_add_f32_e32 v10, v10, v4 @@ -1240,7 +1312,7 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX8-IEEE-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX8-IEEE-NEXT: v_div_fixup_f16 v1, v4, v6, v3 +; GFX8-IEEE-NEXT: v_div_fixup_f16 v1, v4, v3, v6 ; GFX8-IEEE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-IEEE-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-IEEE-NEXT: s_setpc_b64 s[30:31] @@ -1250,11 +1322,11 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX8-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v8, v6 +; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v8, v3 ; GFX8-FLUSH-NEXT: v_rcp_f32_e32 v5, v2 -; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v3 +; GFX8-FLUSH-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v6 ; GFX8-FLUSH-NEXT: v_mul_f32_e32 v9, v4, v5 ; GFX8-FLUSH-NEXT: v_mad_f32 v10, -v2, v9, v4 ; GFX8-FLUSH-NEXT: v_mac_f32_e32 v9, v10, v5 @@ -1273,7 +1345,7 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX8-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX8-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v6, v3 +; GFX8-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v3, v6 ; GFX8-FLUSH-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-FLUSH-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-FLUSH-NEXT: s_setpc_b64 s[30:31] @@ -1283,11 +1355,11 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX9-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v6 +; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v8, v3 ; GFX9-IEEE-NEXT: v_rcp_f32_e32 v5, v2 -; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v3 +; GFX9-IEEE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v7, v6 ; GFX9-IEEE-NEXT: v_mul_f32_e32 v9, v4, v5 ; GFX9-IEEE-NEXT: v_mul_f32_e64 v10, -v2, v9 ; GFX9-IEEE-NEXT: v_add_f32_e32 v10, v10, v4 @@ -1312,7 +1384,7 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX9-IEEE-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX9-IEEE-NEXT: v_div_fixup_f16 v1, v4, v6, v3 +; GFX9-IEEE-NEXT: v_div_fixup_f16 v1, v4, v3, v6 ; GFX9-IEEE-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-IEEE-NEXT: s_setpc_b64 s[30:31] ; @@ -1321,11 +1393,11 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, v0 -; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v5 +; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v7, v3 ; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 -; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v6, v3 +; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v6, v5 ; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v7, v7 ; GFX9-FLUSH-NEXT: v_mul_f32_e32 v4, v4, v2 ; GFX9-FLUSH-NEXT: v_mad_mix_f32 v8, -v1, v4, v0 op_sel_hi:[1,0,1] @@ -1344,7 +1416,7 @@ define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v2, v1, v0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v5, v3 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v4, v3, v5 ; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-FLUSH-NEXT: s_setpc_b64 s[30:31] ; @@ -1454,7 +1526,9 @@ define <2 x half> @v_rcp_v2f16(<2 x half> %x) { ; GFX6-IEEE-LABEL: v_rcp_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 @@ -1481,12 +1555,17 @@ define <2 x half> @v_rcp_v2f16(<2 x half> %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 @@ -1503,7 +1582,6 @@ define <2 x half> @v_rcp_v2f16(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -1519,6 +1597,8 @@ define <2 x half> @v_rcp_v2f16(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_rcp_v2f16: @@ -1757,7 +1837,9 @@ define <2 x half> @v_neg_rcp_v2f16(<2 x half> %x) { ; GFX6-IEEE-LABEL: v_neg_rcp_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, -1.0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 @@ -1784,12 +1866,17 @@ define <2 x half> @v_neg_rcp_v2f16(<2 x half> %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rcp_v2f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, -1.0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 @@ -1806,7 +1893,6 @@ define <2 x half> @v_neg_rcp_v2f16(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, -1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -1822,6 +1908,8 @@ define <2 x half> @v_neg_rcp_v2f16(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_neg_rcp_v2f16: @@ -2060,46 +2148,51 @@ define <2 x half> @v_rcp_v2f16_fabs(<2 x half> %x) { ; GFX6-IEEE-LABEL: v_rcp_v2f16_fabs: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-IEEE-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-IEEE-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v2 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16_fabs: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-FLUSH-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-FLUSH-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 @@ -2117,24 +2210,25 @@ define <2 x half> @v_rcp_v2f16_fabs(<2 x half> %x) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, 1.0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v3, v2, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v0, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v2 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_rcp_v2f16_fabs: @@ -2247,32 +2341,32 @@ define <2 x half> @v_rcp_v2f16_fabs(<2 x half> %x) { ; GFX9-FLUSH-LABEL: v_rcp_v2f16_fabs: ; GFX9-FLUSH: ; %bb.0: ; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-FLUSH-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v1 -; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, v3 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, 1.0 -; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 -; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v4, v4 -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v2 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v5, v5, v4 -; GFX9-FLUSH-NEXT: v_mac_f32_e32 v6, v7, v2 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v7, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mac_f32_e32 v5, v7, v4 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v8, -v1, v6, 1.0 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v2, v8, v2 -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v0, v0, v4 -; GFX9-FLUSH-NEXT: v_and_b32_e32 v2, 0xff800000, v2 -; GFX9-FLUSH-NEXT: v_and_b32_e32 v0, 0xff800000, v0 -; GFX9-FLUSH-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX9-FLUSH-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v2, v1, 1.0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v0, v3, 1.0 -; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v2 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, 1.0 +; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v1, v1 +; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v3, v3 +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v1 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mac_f32_e32 v5, v6, v1 +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v4, v4, v3 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v7, -v0, v5, 1.0 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v6, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v1, v7, v1 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v1, 0xff800000, v1 +; GFX9-FLUSH-NEXT: v_mac_f32_e32 v4, v6, v3 +; GFX9-FLUSH-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v3, v5, v3 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v3, 0xff800000, v3 +; GFX9-FLUSH-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0 +; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-IEEE-LABEL: v_rcp_v2f16_fabs: @@ -2346,32 +2440,32 @@ define <2 x half> @v_rcp_v2f16_fabs(<2 x half> %x) { ; GFX11-LABEL: v_rcp_v2f16_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cvt_f32_f16_e32 v5, 1.0 -; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 +; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0 +; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1] ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2 ; GFX11-NEXT: v_rcp_f32_e32 v3, v3 +; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, 1.0 op_sel_hi:[1,0,1] ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v6, v5, v3 -; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1] -; GFX11-NEXT: v_cvt_f32_f16_e32 v4, v2 -; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v3 -; GFX11-NEXT: v_rcp_f32_e32 v4, v4 -; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, 1.0 op_sel_hi:[1,0,1] -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v5, v5, v4 -; GFX11-NEXT: v_fma_mix_f32 v8, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX11-NEXT: v_fmac_f32_e32 v5, v8, v4 -; GFX11-NEXT: v_fma_mix_f32 v0, -|v0|, v5, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX11-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4 -; GFX11-NEXT: v_and_b32_e32 v0, 0xff800000, v0 -; GFX11-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3 -; GFX11-NEXT: v_add_f32_e32 v3, v3, v6 -; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3 +; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3 +; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, 1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_mul_f32 v3, v7, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff800000, v3 +; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2 +; GFX11-NEXT: v_add_f32_e32 v2, v2, v5 ; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX11-NEXT: v_div_fixup_f16 v0, v0, v2, 1.0 +; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, 1.0 -; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, 1.0 +; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fdiv = fdiv <2 x half> , %x.fabs @@ -2382,46 +2476,51 @@ define <2 x half> @v_neg_rcp_v2f16_fabs(<2 x half> %x) { ; GFX6-IEEE-LABEL: v_neg_rcp_v2f16_fabs: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, -1.0 ; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-IEEE-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-IEEE-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v2 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rcp_v2f16_fabs: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-FLUSH-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-FLUSH-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 @@ -2439,24 +2538,25 @@ define <2 x half> @v_neg_rcp_v2f16_fabs(<2 x half> %x) { ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, -1.0 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v3, v2, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v0, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v2 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_neg_rcp_v2f16_fabs: @@ -2569,32 +2669,32 @@ define <2 x half> @v_neg_rcp_v2f16_fabs(<2 x half> %x) { ; GFX9-FLUSH-LABEL: v_neg_rcp_v2f16_fabs: ; GFX9-FLUSH: ; %bb.0: ; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-FLUSH-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v1 -; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, v3 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, -1.0 -; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 -; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v4, v4 -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v2 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v5, v5, v4 -; GFX9-FLUSH-NEXT: v_mac_f32_e32 v6, v7, v2 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v7, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mac_f32_e32 v5, v7, v4 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v8, -v1, v6, -1.0 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v2, v8, v2 -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v0, v0, v4 -; GFX9-FLUSH-NEXT: v_and_b32_e32 v2, 0xff800000, v2 -; GFX9-FLUSH-NEXT: v_and_b32_e32 v0, 0xff800000, v0 -; GFX9-FLUSH-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX9-FLUSH-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v2, v1, -1.0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v0, v3, -1.0 -; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX9-FLUSH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v2 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, -1.0 +; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v1, v1 +; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v3, v3 +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v1 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mac_f32_e32 v5, v6, v1 +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v4, v4, v3 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v7, -v0, v5, -1.0 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v6, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v1, v7, v1 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v1, 0xff800000, v1 +; GFX9-FLUSH-NEXT: v_mac_f32_e32 v4, v6, v3 +; GFX9-FLUSH-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v3, v5, v3 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v3, 0xff800000, v3 +; GFX9-FLUSH-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v1, v0, -1.0 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v3, v2, -1.0 +; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-IEEE-LABEL: v_neg_rcp_v2f16_fabs: @@ -2668,32 +2768,32 @@ define <2 x half> @v_neg_rcp_v2f16_fabs(<2 x half> %x) { ; GFX11-LABEL: v_neg_rcp_v2f16_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cvt_f32_f16_e32 v5, -1.0 -; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 +; GFX11-NEXT: v_cvt_f32_f16_e32 v4, -1.0 +; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v5, v4, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1] ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v2 ; GFX11-NEXT: v_rcp_f32_e32 v3, v3 +; GFX11-NEXT: v_fma_mix_f32 v6, -v0, v5, -1.0 op_sel_hi:[1,0,1] ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v6, v5, v3 -; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1] -; GFX11-NEXT: v_cvt_f32_f16_e32 v4, v2 -; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v3 -; GFX11-NEXT: v_rcp_f32_e32 v4, v4 -; GFX11-NEXT: v_fma_mix_f32 v7, -v1, v6, -1.0 op_sel_hi:[1,0,1] -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v5, v5, v4 -; GFX11-NEXT: v_fma_mix_f32 v8, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX11-NEXT: v_fmac_f32_e32 v5, v8, v4 -; GFX11-NEXT: v_fma_mix_f32 v0, -|v0|, v5, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] -; GFX11-NEXT: v_dual_mul_f32 v3, v7, v3 :: v_dual_mul_f32 v0, v0, v4 -; GFX11-NEXT: v_and_b32_e32 v0, 0xff800000, v0 -; GFX11-NEXT: v_dual_add_f32 v0, v0, v5 :: v_dual_and_b32 v3, 0xff800000, v3 -; GFX11-NEXT: v_add_f32_e32 v3, v3, v6 -; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v4, v4, v3 +; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX11-NEXT: v_fmac_f32_e32 v4, v7, v3 +; GFX11-NEXT: v_fma_mix_f32 v7, -v0, v4, -1.0 op_sel:[1,0,0] op_sel_hi:[1,0,1] +; GFX11-NEXT: v_dual_mul_f32 v2, v6, v2 :: v_dual_mul_f32 v3, v7, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff800000, v3 +; GFX11-NEXT: v_dual_add_f32 v3, v3, v4 :: v_dual_and_b32 v2, 0xff800000, v2 +; GFX11-NEXT: v_add_f32_e32 v2, v2, v5 ; GFX11-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX11-NEXT: v_div_fixup_f16 v0, v0, v2, -1.0 +; GFX11-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX11-NEXT: v_div_fixup_f16 v1, v3, v1, -1.0 -; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX11-NEXT: v_div_fixup_f16 v0, v2, v0, -1.0 +; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fdiv = fdiv <2 x half> , %x.fabs @@ -2704,7 +2804,9 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) { ; GFX6-IEEE-LABEL: v_rcp_v2f16_arcp: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 @@ -2731,12 +2833,17 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16_arcp: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 @@ -2753,7 +2860,6 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -2769,6 +2875,8 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_rcp_v2f16_arcp: @@ -2812,15 +2920,19 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) { ; GFX6-LABEL: v_rcp_v2f16_arcp_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_f32_e32 v0, v2, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_rcp_v2f16_arcp_afn: @@ -2864,7 +2976,9 @@ define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) { ; GFX6-IEEE-LABEL: v_rcp_v2f16_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 @@ -2891,12 +3005,17 @@ define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 @@ -2913,7 +3032,6 @@ define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -2929,6 +3047,8 @@ define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_rcp_v2f16_ulp25: @@ -3167,6 +3287,10 @@ define <2 x half> @v_fdiv_v2f16_afn_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-LABEL: v_fdiv_v2f16_afn_ulp25: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -3177,6 +3301,8 @@ define <2 x half> @v_fdiv_v2f16_afn_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_afn_ulp25: @@ -3229,6 +3355,10 @@ define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-IEEE-LABEL: v_fdiv_v2f16_arcp_ulp25: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -3257,11 +3387,18 @@ define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16_arcp_ulp25: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 @@ -3279,7 +3416,6 @@ define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 @@ -3295,6 +3431,8 @@ define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_arcp_ulp25: @@ -3347,6 +3485,10 @@ define <2 x half> @v_fdiv_v2f16_arcp_afn_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -3357,6 +3499,8 @@ define <2 x half> @v_fdiv_v2f16_arcp_afn_ulp25(<2 x half> %a, <2 x half> %b) { ; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: @@ -3700,8 +3844,8 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, s0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX6-IEEE-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-IEEE-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-IEEE-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 @@ -3737,8 +3881,8 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, s0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX6-FLUSH-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-FLUSH-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-FLUSH-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 ; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 @@ -3779,11 +3923,11 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX8-IEEE: ; %bb.0: ; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v0, s1 ; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v1, s0 -; GFX8-IEEE-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v4, s3 +; GFX8-IEEE-NEXT: s_lshr_b32 s2, s1, 16 +; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v4, s2 ; GFX8-IEEE-NEXT: v_rcp_f32_e32 v2, v0 -; GFX8-IEEE-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v3, s2 +; GFX8-IEEE-NEXT: s_lshr_b32 s3, s0, 16 +; GFX8-IEEE-NEXT: v_cvt_f32_f16_e32 v3, s3 ; GFX8-IEEE-NEXT: v_mul_f32_e32 v5, v1, v2 ; GFX8-IEEE-NEXT: v_mul_f32_e64 v6, -v0, v5 ; GFX8-IEEE-NEXT: v_add_f32_e32 v6, v6, v1 @@ -3804,13 +3948,13 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX8-IEEE-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX8-IEEE-NEXT: v_mul_f32_e32 v1, v3, v1 ; GFX8-IEEE-NEXT: v_and_b32_e32 v1, 0xff800000, v1 -; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-IEEE-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX8-IEEE-NEXT: v_mov_b32_e32 v2, s1 -; GFX8-IEEE-NEXT: v_div_fixup_f16 v0, v0, v2, s0 -; GFX8-IEEE-NEXT: v_mov_b32_e32 v2, s3 -; GFX8-IEEE-NEXT: v_div_fixup_f16 v1, v1, v2, s2 +; GFX8-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-IEEE-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-IEEE-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-IEEE-NEXT: v_div_fixup_f16 v1, v1, s2, v3 +; GFX8-IEEE-NEXT: v_div_fixup_f16 v0, v0, s1, v2 ; GFX8-IEEE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-IEEE-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-IEEE-NEXT: v_readfirstlane_b32 s0, v0 @@ -3820,11 +3964,11 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX8-FLUSH: ; %bb.0: ; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, s1 ; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, s0 -; GFX8-FLUSH-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s3 +; GFX8-FLUSH-NEXT: s_lshr_b32 s2, s1, 16 +; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s2 ; GFX8-FLUSH-NEXT: v_rcp_f32_e32 v2, v0 -; GFX8-FLUSH-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, s2 +; GFX8-FLUSH-NEXT: s_lshr_b32 s3, s0, 16 +; GFX8-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, s3 ; GFX8-FLUSH-NEXT: v_mul_f32_e32 v5, v1, v2 ; GFX8-FLUSH-NEXT: v_mad_f32 v6, -v0, v5, v1 ; GFX8-FLUSH-NEXT: v_mac_f32_e32 v5, v6, v2 @@ -3839,13 +3983,13 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX8-FLUSH-NEXT: v_mad_f32 v3, -v4, v2, v3 ; GFX8-FLUSH-NEXT: v_mul_f32_e32 v1, v3, v1 ; GFX8-FLUSH-NEXT: v_and_b32_e32 v1, 0xff800000, v1 -; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-FLUSH-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX8-FLUSH-NEXT: v_mov_b32_e32 v2, s1 -; GFX8-FLUSH-NEXT: v_div_fixup_f16 v0, v0, v2, s0 -; GFX8-FLUSH-NEXT: v_mov_b32_e32 v2, s3 -; GFX8-FLUSH-NEXT: v_div_fixup_f16 v1, v1, v2, s2 +; GFX8-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-FLUSH-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-FLUSH-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-FLUSH-NEXT: v_div_fixup_f16 v1, v1, s2, v3 +; GFX8-FLUSH-NEXT: v_div_fixup_f16 v0, v0, s1, v2 ; GFX8-FLUSH-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-FLUSH-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-FLUSH-NEXT: v_readfirstlane_b32 s0, v0 @@ -3855,11 +3999,11 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX9-IEEE: ; %bb.0: ; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v0, s1 ; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v1, s0 -; GFX9-IEEE-NEXT: s_lshr_b32 s3, s1, 16 -; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v4, s3 +; GFX9-IEEE-NEXT: s_lshr_b32 s2, s1, 16 +; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v4, s2 ; GFX9-IEEE-NEXT: v_rcp_f32_e32 v2, v0 -; GFX9-IEEE-NEXT: s_lshr_b32 s2, s0, 16 -; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v3, s2 +; GFX9-IEEE-NEXT: s_lshr_b32 s3, s0, 16 +; GFX9-IEEE-NEXT: v_cvt_f32_f16_e32 v3, s3 ; GFX9-IEEE-NEXT: v_mul_f32_e32 v5, v1, v2 ; GFX9-IEEE-NEXT: v_mul_f32_e64 v6, -v0, v5 ; GFX9-IEEE-NEXT: v_add_f32_e32 v6, v6, v1 @@ -3880,13 +4024,13 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX9-IEEE-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-IEEE-NEXT: v_mul_f32_e32 v1, v3, v1 ; GFX9-IEEE-NEXT: v_and_b32_e32 v1, 0xff800000, v1 -; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-IEEE-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX9-IEEE-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-IEEE-NEXT: v_div_fixup_f16 v0, v0, v2, s0 -; GFX9-IEEE-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-IEEE-NEXT: v_div_fixup_f16 v1, v1, v2, s2 +; GFX9-IEEE-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-IEEE-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-IEEE-NEXT: v_div_fixup_f16 v0, v0, s1, v2 +; GFX9-IEEE-NEXT: v_div_fixup_f16 v1, v1, s2, v3 ; GFX9-IEEE-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-IEEE-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-IEEE-NEXT: ; return to shader part epilog @@ -3895,32 +4039,32 @@ define amdgpu_ps i32 @s_fdiv_v2f16(i32 inreg %a.arg, i32 inreg %b.arg) { ; GFX9-FLUSH: ; %bb.0: ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, s1 ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, s0 -; GFX9-FLUSH-NEXT: s_lshr_b32 s3, s1, 16 -; GFX9-FLUSH-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-FLUSH-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-FLUSH-NEXT: s_lshr_b32 s2, s1, 16 ; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v0, v0 -; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, s3 -; GFX9-FLUSH-NEXT: s_lshr_b32 s2, s0, 16 +; GFX9-FLUSH-NEXT: s_lshr_b32 s0, s0, 16 +; GFX9-FLUSH-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-FLUSH-NEXT: v_mul_f32_e32 v1, v1, v0 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v4, -v2, v1, s0 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v4, -s1, v1, v2 op_sel_hi:[1,0,1] ; GFX9-FLUSH-NEXT: v_mac_f32_e32 v1, v4, v0 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v4, -v2, v1, s0 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v0, v4, v0 ; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s2 -; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v3, v3 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -s1, v1, v2 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v0, v5, v0 +; GFX9-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, s0 +; GFX9-FLUSH-NEXT: v_rcp_f32_e32 v4, v4 ; GFX9-FLUSH-NEXT: v_and_b32_e32 v0, 0xff800000, v0 ; GFX9-FLUSH-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-FLUSH-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v4, v4, v3 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -v1, v4, s2 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mac_f32_e32 v4, v5, v3 -; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -v1, v4, s2 op_sel_hi:[1,0,1] -; GFX9-FLUSH-NEXT: v_mul_f32_e32 v3, v5, v3 -; GFX9-FLUSH-NEXT: v_and_b32_e32 v3, 0xff800000, v3 -; GFX9-FLUSH-NEXT: v_add_f32_e32 v3, v3, v4 ; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v0, v2, s0 -; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v3, v1, s2 +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v1, v5, v4 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -s2, v1, v3 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mac_f32_e32 v1, v5, v4 +; GFX9-FLUSH-NEXT: v_mad_mix_f32 v5, -s2, v1, v3 op_sel_hi:[1,0,1] +; GFX9-FLUSH-NEXT: v_mul_f32_e32 v4, v5, v4 +; GFX9-FLUSH-NEXT: v_and_b32_e32 v4, 0xff800000, v4 +; GFX9-FLUSH-NEXT: v_add_f32_e32 v1, v4, v1 +; GFX9-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v0, v0, s1, v2 +; GFX9-FLUSH-NEXT: v_div_fixup_f16 v1, v1, s2, v3 ; GFX9-FLUSH-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-FLUSH-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-FLUSH-NEXT: ; return to shader part epilog @@ -4567,8 +4711,9 @@ define half @v_rsq_f16(half %a) { ; GFX6-IEEE-LABEL: v_rsq_f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -4584,15 +4729,17 @@ define half @v_rsq_f16(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rsq_f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -4609,6 +4756,7 @@ define half @v_rsq_f16(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16: @@ -4631,8 +4779,9 @@ define half @v_neg_rsq_f16(half %a) { ; GFX6-IEEE-LABEL: v_neg_rsq_f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -4648,15 +4797,17 @@ define half @v_neg_rsq_f16(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rsq_f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -4673,6 +4824,7 @@ define half @v_neg_rsq_f16(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_neg_rsq_f16: @@ -4705,32 +4857,35 @@ define { half, half } @v_rsq_f16_multi_use(half %a) { ; GFX6-IEEE-LABEL: v_rsq_f16_multi_use: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v2, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 +; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rsq_f16_multi_use: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 @@ -4747,6 +4902,8 @@ define { half, half } @v_rsq_f16_multi_use(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v3, v2, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16_multi_use: @@ -4784,8 +4941,9 @@ define half @v_rsq_f16_missing_contract0(half %a) { ; GFX6-IEEE-LABEL: v_rsq_f16_missing_contract0: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -4801,15 +4959,17 @@ define half @v_rsq_f16_missing_contract0(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rsq_f16_missing_contract0: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -4826,6 +4986,7 @@ define half @v_rsq_f16_missing_contract0(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16_missing_contract0: @@ -4858,8 +5019,9 @@ define half @v_rsq_f16_missing_contract1(half %a) { ; GFX6-IEEE-LABEL: v_rsq_f16_missing_contract1: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -4875,15 +5037,17 @@ define half @v_rsq_f16_missing_contract1(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rsq_f16_missing_contract1: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -4900,6 +5064,7 @@ define half @v_rsq_f16_missing_contract1(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16_missing_contract1: @@ -4932,8 +5097,9 @@ define half @v_neg_rsq_f16_missing_contract0(half %a) { ; GFX6-IEEE-LABEL: v_neg_rsq_f16_missing_contract0: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -4949,15 +5115,17 @@ define half @v_neg_rsq_f16_missing_contract0(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rsq_f16_missing_contract0: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -4974,6 +5142,7 @@ define half @v_neg_rsq_f16_missing_contract0(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_neg_rsq_f16_missing_contract0: @@ -5006,8 +5175,9 @@ define half @v_neg_rsq_f16_missing_contract1(half %a) { ; GFX6-IEEE-LABEL: v_neg_rsq_f16_missing_contract1: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -5023,15 +5193,17 @@ define half @v_neg_rsq_f16_missing_contract1(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rsq_f16_missing_contract1: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -5048,6 +5220,7 @@ define half @v_neg_rsq_f16_missing_contract1(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_neg_rsq_f16_missing_contract1: @@ -5080,8 +5253,9 @@ define half @v_neg_rsq_f16_fabs(half %a) { ; GFX6-IEEE-LABEL: v_neg_rsq_f16_fabs: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -5097,15 +5271,17 @@ define half @v_neg_rsq_f16_fabs(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rsq_f16_fabs: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -5122,6 +5298,7 @@ define half @v_neg_rsq_f16_fabs(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_neg_rsq_f16_fabs: @@ -5155,8 +5332,9 @@ define half @v_rsq_f16_arcp(half %a) { ; GFX6-IEEE-LABEL: v_rsq_f16_arcp: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -5172,15 +5350,17 @@ define half @v_rsq_f16_arcp(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rsq_f16_arcp: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -5197,6 +5377,7 @@ define half @v_rsq_f16_arcp(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16_arcp: @@ -5219,8 +5400,9 @@ define half @v_neg_rsq_f16_arcp(half %a) { ; GFX6-IEEE-LABEL: v_neg_rsq_f16_arcp: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -5236,15 +5418,17 @@ define half @v_neg_rsq_f16_arcp(half %a) { ; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rsq_f16_arcp: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, -1.0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 @@ -5261,6 +5445,7 @@ define half @v_neg_rsq_f16_arcp(half %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_neg_rsq_f16_arcp: @@ -5293,14 +5478,16 @@ define half @v_rsq_f16_afn(half %a) { ; GFX6-LABEL: v_rsq_f16_afn: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, v1, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16_afn: @@ -5323,14 +5510,16 @@ define half @v_rsq_f16_afn_nocontract(half %a) { ; GFX6-LABEL: v_rsq_f16_afn_nocontract: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, v1, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: v_rsq_f16_afn_nocontract: @@ -5363,9 +5552,11 @@ define <2 x half> @v_rsq_v2f16(<2 x half> %a) { ; GFX6-IEEE-LABEL: v_rsq_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -5374,40 +5565,44 @@ define <2 x half> @v_rsq_v2f16(<2 x half> %a) { ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v6, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v2, v0, v2 ; GFX6-IEEE-NEXT: v_div_scale_f32 v5, s[4:5], v1, v1, v2 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v7, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v9, -v3, v6, 1.0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v2, v0, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v9, v6, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v10, -v5, v7, 1.0 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v9, v4, v6 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v8, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v7, v10, v7, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v10, -v3, v9, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v9, v10, v6, v9 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v9, v4 +; GFX6-IEEE-NEXT: v_div_scale_f32 v8, s[4:5], v2, v1, v2 ; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v6, v9 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-IEEE-NEXT: v_fma_f32 v3, -v5, v8, 1.0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v7, s[4:5], v2, v1, v2 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v3, v8, v8 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v4, v7, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v5, v4, v7 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v3, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v5, -v5, v4, v7 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v3, v8, v7 +; GFX6-IEEE-NEXT: v_fma_f32 v4, -v5, v3, v8 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v4, v7, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v4, -v5, v3, v8 ; GFX6-IEEE-NEXT: s_mov_b64 vcc, s[4:5] -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v5, v3, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v4, v7, v3 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rsq_v2f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v1, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -5441,6 +5636,8 @@ define <2 x half> @v_rsq_v2f16(<2 x half> %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_rsq_v2f16: @@ -5677,9 +5874,11 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) { ; GFX6-IEEE-LABEL: v_neg_rsq_v2f16: ; GFX6-IEEE: ; %bb.0: ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, -1.0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, -1.0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -5688,40 +5887,44 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) { ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v6, v3 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v2, v0, v2 ; GFX6-IEEE-NEXT: v_div_scale_f32 v5, s[4:5], v1, v1, v2 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v7, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v9, -v3, v6, 1.0 +; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v2, v0, v2 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v9, v6, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v10, -v5, v7, 1.0 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v9, v4, v6 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v8, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v7, v10, v7, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v10, -v3, v9, v4 ; GFX6-IEEE-NEXT: v_fma_f32 v9, v10, v6, v9 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v9, v4 +; GFX6-IEEE-NEXT: v_div_scale_f32 v8, s[4:5], v2, v1, v2 ; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v6, v9 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 -; GFX6-IEEE-NEXT: v_fma_f32 v3, -v5, v8, 1.0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v7, s[4:5], v2, v1, v2 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v3, v8, v8 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v4, v7, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v5, v4, v7 -; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v3, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v5, -v5, v4, v7 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v3, v8, v7 +; GFX6-IEEE-NEXT: v_fma_f32 v4, -v5, v3, v8 +; GFX6-IEEE-NEXT: v_fma_f32 v3, v4, v7, v3 +; GFX6-IEEE-NEXT: v_fma_f32 v4, -v5, v3, v8 ; GFX6-IEEE-NEXT: s_mov_b64 vcc, s[4:5] -; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v5, v3, v4 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v4, v7, v3 ; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_neg_rsq_v2f16: ; GFX6-FLUSH: ; %bb.0: ; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, -1.0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, -1.0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_sqrt_f32_e32 v1, v1 -; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -5755,6 +5958,8 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) { ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 ; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 ; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-IEEE-LABEL: v_neg_rsq_v2f16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index 8db1f46b0342a..6396c3df3772b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v18 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] @@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v18 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] @@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v18 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] @@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll index 340e293cda7b5..b7d6023b24923 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -12,7 +12,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (f32) on %ir.ptr) ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic @@ -23,7 +23,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (f32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret void @@ -38,7 +38,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (f32) on %ir.ptr) ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -50,7 +50,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (f32) on %ir.ptr) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) @@ -66,7 +66,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr) ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw @@ -77,7 +77,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret void @@ -92,7 +92,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr) ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -104,7 +104,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll index c82ae2fbcbbdc..f94d038f90acc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -13,7 +13,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (f64) on %ir.ptr) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -30,7 +30,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (f64) on %ir.ptr) ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll index 5909fe3d3694a..f833a9004957b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll @@ -10,7 +10,7 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %da ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr) ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 @@ -27,7 +27,7 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_saddr_rtn(ptr inreg %ptr, <2 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX942-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[COPY3]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[COPY3]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr) ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 @@ -43,7 +43,7 @@ define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn(ptr %ptr, <2 x half> %data) ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr) ; GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void @@ -59,7 +59,7 @@ define amdgpu_ps void @flat_atomic_fadd_v2f16_saddr_no_rtn(ptr inreg %ptr, <2 x ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX942-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[COPY3]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[COPY3]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr) ; GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll index 0b09cabf25a16..0f084baef2abe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -83,11 +83,15 @@ define half @v_fma_f16(half %x, half %y, half %z) { ; GFX6-LABEL: v_fma_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_f16: @@ -128,11 +132,15 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) { ; GFX6-LABEL: v_fma_f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX6-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_f16_fneg_lhs: @@ -173,11 +181,15 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) { ; GFX6-LABEL: v_fma_f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e64 v1, -v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e64 v1, -v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_f16_fneg_rhs: @@ -218,11 +230,15 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) { ; GFX6-LABEL: v_fma_f16_fneg_add: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e64 v2, -v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e64 v2, -v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_f16_fneg_add: @@ -263,6 +279,12 @@ define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) { ; GFX6-LABEL: v_fma_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 @@ -273,16 +295,18 @@ define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) { ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX8-NEXT: v_fma_f16 v1, v4, v3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -312,32 +336,39 @@ define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y, <2 x half> ; GFX6-LABEL: v_fma_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 +; GFX6-NEXT: v_fma_f32 v1, v5, v1, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16_fneg_lhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX8-NEXT: v_fma_f16 v1, v4, v3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -368,32 +399,39 @@ define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y, <2 x half> ; GFX6-LABEL: v_fma_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 +; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX6-NEXT: v_fma_f32 v0, v0, v2, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 +; GFX6-NEXT: v_fma_f32 v1, v1, v5, v4 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX8-NEXT: v_fma_f16 v1, v4, v3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -424,6 +462,12 @@ define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x h ; GFX6-LABEL: v_fma_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 @@ -434,16 +478,18 @@ define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x h ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16_fneg_lhs_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX8-NEXT: v_fma_f16 v1, v4, v3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -475,21 +521,33 @@ define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) { ; GFX6-LABEL: v_fma_v3f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX6-NEXT: v_cvt_f16_f32_e32 v8, v8 ; GFX6-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v7 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 ; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v8 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v4 -; GFX6-NEXT: v_fma_f32 v2, v2, v5, v6 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_fma_f32 v2, v2, v5, v6 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v3f16: @@ -500,9 +558,12 @@ define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) { ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 ; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, s4, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fma_v3f16: @@ -533,16 +594,28 @@ define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) { ; GFX6-LABEL: v_fma_v4f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX6-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX6-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX6-NEXT: v_cvt_f16_f32_e32 v10, v10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v8 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 ; GFX6-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX6-NEXT: v_cvt_f16_f32_e32 v11, v11 ; GFX6-NEXT: v_fma_f32 v0, v0, v4, v8 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v6 ; GFX6-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v6 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v7 @@ -553,21 +626,25 @@ define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) { ; GFX6-NEXT: v_fma_f32 v3, v3, v6, v7 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v4f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5 ; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 -; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10 +; GFX8-NEXT: v_fma_f16 v2, v7, v9, v11 ; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 -; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11 +; GFX8-NEXT: v_fma_f16 v3, v6, v8, v10 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll index 543f8e413abd8..e855bf395b28c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll @@ -117,10 +117,13 @@ define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) { ; GFX8-LABEL: v_fmul_v3f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s4, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fmul_v3f16: @@ -146,10 +149,13 @@ define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s4, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fmul_v3f16_fneg_lhs: @@ -176,10 +182,13 @@ define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s4, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fmul_v3f16_fneg_rhs: @@ -204,10 +213,13 @@ define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) ; GFX8-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s4, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll index 99261cc269858..5dc2802f31af5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -323,10 +323,12 @@ define half @v_pow_f16(half %x, half %y) { ; GFX6-LABEL: v_pow_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; GFX6-NEXT: v_mov_b32_e32 v3, 0x42800000 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 @@ -337,6 +339,7 @@ define half @v_pow_f16(half %x, half %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f16: @@ -407,6 +410,10 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX6-LABEL: v_pow_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -432,6 +439,8 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16: @@ -461,13 +470,14 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_exp_f16_e32 v1, v1 ; GFX9-NEXT: v_exp_f16_e32 v0, v0 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_v2f16: @@ -481,12 +491,11 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX10-NEXT: v_exp_f16_e32 v1, v2 +; GFX10-NEXT: v_exp_f16_e32 v1, v1 ; GFX10-NEXT: v_exp_f16_e32 v0, v0 -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-TRUE16-LABEL: v_pow_v2f16: @@ -512,27 +521,26 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX11-FAKE16-LABEL: v_pow_v2f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v0 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3 -; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3 ; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX11-FAKE16-NEXT: v_exp_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) ret <2 x half> %pow @@ -542,36 +550,37 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX6-LABEL: v_pow_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-NEXT: v_cvt_f16_f32_e64 v1, -v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX6-NEXT: v_log_f32_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_mov_b32_e32 v4, 0xc2fc0000 ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000 +; GFX6-NEXT: v_mov_b32_e32 v5, 0x42800000 +; GFX6-NEXT: v_log_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GFX6-NEXT: v_not_b32_e32 v3, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 -; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; GFX6-NEXT: v_add_f32_e32 v1, v1, v5 -; GFX6-NEXT: v_not_b32_e32 v5, 63 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 -; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX6-NEXT: v_exp_f32_e32 v2, v0 -; GFX6-NEXT: v_ldexp_f32_e32 v0, v1, v6 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc -; GFX6-NEXT: v_ldexp_f32_e32 v1, v2, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_lhs: @@ -603,13 +612,14 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_exp_f16_e32 v1, v1 ; GFX9-NEXT: v_exp_f16_e32 v0, v0 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_v2f16_fneg_lhs: @@ -624,12 +634,11 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX10-NEXT: v_exp_f16_e32 v1, v2 +; GFX10-NEXT: v_exp_f16_e32 v1, v1 ; GFX10-NEXT: v_exp_f16_e32 v0, v0 -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-TRUE16-LABEL: v_pow_v2f16_fneg_lhs: @@ -661,25 +670,23 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v0 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3 +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: v_exp_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %x.fneg = fneg <2 x half> %x %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) @@ -690,36 +697,37 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX6-LABEL: v_pow_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e64 v2, -v2 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e64 v3, -v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v2 +; GFX6-NEXT: v_mov_b32_e32 v4, 0xc2fc0000 ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_mov_b32_e32 v5, 0x42800000 ; GFX6-NEXT: v_log_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; GFX6-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX6-NEXT: v_not_b32_e32 v5, 63 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GFX6-NEXT: v_not_b32_e32 v3, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GFX6-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc ; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6 -; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_rhs: @@ -751,33 +759,33 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_exp_f16_e32 v1, v2 ; GFX9-NEXT: v_exp_f16_e32 v0, v0 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_v2f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_log_f16_e32 v2, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX10-NEXT: v_exp_f16_e32 v1, v2 +; GFX10-NEXT: v_exp_f16_e32 v1, v1 ; GFX10-NEXT: v_exp_f16_e32 v0, v0 -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-TRUE16-LABEL: v_pow_v2f16_fneg_rhs: @@ -805,28 +813,27 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX11-FAKE16-LABEL: v_pow_v2f16_fneg_rhs: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v0 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3 -; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3 ; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX11-FAKE16-NEXT: v_exp_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %y.fneg = fneg <2 x half> %y %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) @@ -837,41 +844,37 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX6-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX6-NEXT: v_cvt_f16_f32_e64 v2, -v2 +; GFX6-NEXT: v_cvt_f16_f32_e64 v1, -v1 +; GFX6-NEXT: v_cvt_f16_f32_e64 v3, -v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v1 -; GFX6-NEXT: v_log_f32_e32 v3, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_mov_b32_e32 v4, 0xc2fc0000 ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 -; GFX6-NEXT: v_mov_b32_e32 v3, 0xc2fc0000 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; GFX6-NEXT: v_add_f32_e32 v2, v2, v5 -; GFX6-NEXT: v_not_b32_e32 v5, 63 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GFX6-NEXT: v_exp_f32_e32 v2, v2 -; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX6-NEXT: v_exp_f32_e32 v1, v0 -; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v6 +; GFX6-NEXT: v_mov_b32_e32 v5, 0x42800000 +; GFX6-NEXT: v_log_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc -; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GFX6-NEXT: v_not_b32_e32 v3, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_exp_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: @@ -905,13 +908,14 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_exp_f16_e32 v1, v2 ; GFX9-NEXT: v_exp_f16_e32 v0, v0 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: @@ -927,12 +931,11 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX10-NEXT: v_exp_f16_e32 v1, v2 +; GFX10-NEXT: v_exp_f16_e32 v1, v1 ; GFX10-NEXT: v_exp_f16_e32 v0, v0 -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-TRUE16-LABEL: v_pow_v2f16_fneg_lhs_rhs: @@ -965,27 +968,26 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v0 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3 -; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3 ; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX11-FAKE16-NEXT: v_exp_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_exp_f16_e32 v1, v1 ; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %x.fneg = fneg <2 x half> %x %y.fneg = fneg <2 x half> %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll index e4e6c44b051c3..e2ba521388e4c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll @@ -505,8 +505,8 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, s2 ; CI-NEXT: v_cvt_f32_f16_e32 v1, s3 -; CI-NEXT: s_lshr_b32 s4, s2, 16 -; CI-NEXT: s_lshr_b32 s5, s3, 16 +; CI-NEXT: s_lshr_b32 s4, s3, 16 +; CI-NEXT: s_lshr_b32 s5, s2, 16 ; CI-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 ; CI-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; CI-NEXT: v_rcp_f32_e32 v4, v2 @@ -522,8 +522,8 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 ; CI-NEXT: v_trunc_f32_e32 v2, v2 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, s4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, s5 +; CI-NEXT: v_cvt_f32_f16_e32 v1, s5 +; CI-NEXT: v_cvt_f32_f16_e32 v2, s4 ; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_div_scale_f32 v3, s[2:3], v2, v2, v1 @@ -557,38 +557,38 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_load_dword s2, s[2:3], 0x0 ; VI-NEXT: s_load_dword s3, s[4:5], 0x10 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, s2 -; VI-NEXT: v_cvt_f32_f16_e32 v2, s3 -; VI-NEXT: s_lshr_b32 s5, s3, 16 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_lshr_b32 s4, s2, 16 -; VI-NEXT: v_rcp_f32_e32 v3, v2 -; VI-NEXT: v_mul_f32_e32 v4, v0, v3 -; VI-NEXT: v_mad_f32 v5, -v2, v4, v0 -; VI-NEXT: v_mac_f32_e32 v4, v5, v3 -; VI-NEXT: v_mad_f32 v0, -v2, v4, v0 -; VI-NEXT: v_mul_f32_e32 v0, v0, v3 -; VI-NEXT: v_and_b32_e32 v0, 0xff800000, v0 -; VI-NEXT: v_add_f32_e32 v0, v0, v4 -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, s5 -; VI-NEXT: v_mov_b32_e32 v2, s5 -; VI-NEXT: v_div_fixup_f16 v0, v0, v1, s2 -; VI-NEXT: v_trunc_f16_e32 v0, v0 -; VI-NEXT: v_fma_f16 v0, -v0, v1, s2 -; VI-NEXT: v_cvt_f32_f16_e32 v1, s4 +; VI-NEXT: v_cvt_f32_f16_e32 v2, s2 +; VI-NEXT: v_cvt_f32_f16_e32 v3, s3 +; VI-NEXT: s_lshr_b32 s4, s3, 16 +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: s_lshr_b32 s5, s2, 16 ; VI-NEXT: v_rcp_f32_e32 v4, v3 -; VI-NEXT: v_mul_f32_e32 v5, v1, v4 -; VI-NEXT: v_mad_f32 v6, -v3, v5, v1 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mul_f32_e32 v5, v2, v4 +; VI-NEXT: v_mad_f32 v6, -v3, v5, v2 ; VI-NEXT: v_mac_f32_e32 v5, v6, v4 -; VI-NEXT: v_mad_f32 v1, -v3, v5, v1 -; VI-NEXT: v_mul_f32_e32 v1, v1, v4 -; VI-NEXT: v_and_b32_e32 v1, 0xff800000, v1 -; VI-NEXT: v_add_f32_e32 v1, v1, v5 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_div_fixup_f16 v1, v1, v2, s4 -; VI-NEXT: v_trunc_f16_e32 v1, v1 -; VI-NEXT: v_fma_f16 v1, -v1, v2, s4 +; VI-NEXT: v_mad_f32 v2, -v3, v5, v2 +; VI-NEXT: v_mul_f32_e32 v2, v2, v4 +; VI-NEXT: v_and_b32_e32 v2, 0xff800000, v2 +; VI-NEXT: v_add_f32_e32 v2, v2, v5 +; VI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; VI-NEXT: v_cvt_f32_f16_e32 v3, s4 +; VI-NEXT: v_div_fixup_f16 v2, v2, s3, v0 +; VI-NEXT: v_trunc_f16_e32 v2, v2 +; VI-NEXT: v_fma_f16 v0, -v2, s3, v0 +; VI-NEXT: v_cvt_f32_f16_e32 v2, s5 +; VI-NEXT: v_rcp_f32_e32 v4, v3 +; VI-NEXT: v_mul_f32_e32 v5, v2, v4 +; VI-NEXT: v_mad_f32 v6, -v3, v5, v2 +; VI-NEXT: v_mac_f32_e32 v5, v6, v4 +; VI-NEXT: v_mad_f32 v2, -v3, v5, v2 +; VI-NEXT: v_mul_f32_e32 v2, v2, v4 +; VI-NEXT: v_and_b32_e32 v2, 0xff800000, v2 +; VI-NEXT: v_add_f32_e32 v2, v2, v5 +; VI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; VI-NEXT: v_div_fixup_f16 v2, v2, s4, v1 +; VI-NEXT: v_trunc_f16_e32 v2, v2 +; VI-NEXT: v_fma_f16 v1, -v2, s4, v1 ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_e32 v2, v0, v1 ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -614,11 +614,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, s2 ; CI-NEXT: v_cvt_f32_f16_e32 v1, s4 -; CI-NEXT: s_lshr_b32 s8, s2, 16 -; CI-NEXT: s_lshr_b32 s9, s3, 16 -; CI-NEXT: s_lshr_b32 s10, s4, 16 +; CI-NEXT: s_lshr_b32 s8, s3, 16 +; CI-NEXT: s_lshr_b32 s9, s2, 16 +; CI-NEXT: s_lshr_b32 s10, s5, 16 ; CI-NEXT: v_div_scale_f32 v2, s[6:7], v1, v1, v0 -; CI-NEXT: s_lshr_b32 s11, s5, 16 +; CI-NEXT: s_lshr_b32 s11, s4, 16 ; CI-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; CI-NEXT: v_rcp_f32_e32 v4, v2 ; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 @@ -633,8 +633,8 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 ; CI-NEXT: v_trunc_f32_e32 v2, v2 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, s8 -; CI-NEXT: v_cvt_f32_f16_e32 v2, s10 +; CI-NEXT: v_cvt_f32_f16_e32 v1, s9 +; CI-NEXT: v_cvt_f32_f16_e32 v2, s11 ; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_div_scale_f32 v3, s[6:7], v2, v2, v1 @@ -670,8 +670,8 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_div_fixup_f32 v4, v4, v3, v2 ; CI-NEXT: v_trunc_f32_e32 v4, v4 ; CI-NEXT: v_fma_f32 v2, -v4, v3, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, s9 -; CI-NEXT: v_cvt_f32_f16_e32 v4, s11 +; CI-NEXT: v_cvt_f32_f16_e32 v3, s8 +; CI-NEXT: v_cvt_f32_f16_e32 v4, s10 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; CI-NEXT: v_div_scale_f32 v5, s[2:3], v4, v4, v3 ; CI-NEXT: v_div_scale_f32 v6, vcc, v3, v4, v3 @@ -706,75 +706,75 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x20 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, s2 -; VI-NEXT: v_cvt_f32_f16_e32 v2, s4 -; VI-NEXT: s_lshr_b32 s8, s4, 16 -; VI-NEXT: v_mov_b32_e32 v1, s4 -; VI-NEXT: s_lshr_b32 s6, s2, 16 -; VI-NEXT: v_rcp_f32_e32 v3, v2 -; VI-NEXT: s_lshr_b32 s9, s5, 16 -; VI-NEXT: s_lshr_b32 s7, s3, 16 -; VI-NEXT: v_mul_f32_e32 v4, v0, v3 -; VI-NEXT: v_mad_f32 v5, -v2, v4, v0 -; VI-NEXT: v_mac_f32_e32 v4, v5, v3 -; VI-NEXT: v_mad_f32 v0, -v2, v4, v0 -; VI-NEXT: v_mul_f32_e32 v0, v0, v3 +; VI-NEXT: v_cvt_f32_f16_e32 v4, s2 +; VI-NEXT: v_cvt_f32_f16_e32 v5, s4 +; VI-NEXT: s_lshr_b32 s9, s4, 16 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: s_lshr_b32 s7, s2, 16 +; VI-NEXT: v_rcp_f32_e32 v6, v5 +; VI-NEXT: v_mov_b32_e32 v3, s9 +; VI-NEXT: s_lshr_b32 s8, s5, 16 +; VI-NEXT: v_mov_b32_e32 v0, s5 +; VI-NEXT: v_mul_f32_e32 v7, v4, v6 +; VI-NEXT: v_mad_f32 v8, -v5, v7, v4 +; VI-NEXT: v_mac_f32_e32 v7, v8, v6 +; VI-NEXT: v_mad_f32 v4, -v5, v7, v4 +; VI-NEXT: v_mul_f32_e32 v4, v4, v6 +; VI-NEXT: v_and_b32_e32 v4, 0xff800000, v4 +; VI-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; VI-NEXT: v_cvt_f32_f16_e32 v5, s9 +; VI-NEXT: s_lshr_b32 s6, s3, 16 +; VI-NEXT: v_mov_b32_e32 v1, s8 +; VI-NEXT: v_div_fixup_f16 v4, v4, v2, s2 +; VI-NEXT: v_trunc_f16_e32 v4, v4 +; VI-NEXT: v_fma_f16 v2, -v4, v2, s2 +; VI-NEXT: v_cvt_f32_f16_e32 v4, s7 +; VI-NEXT: v_rcp_f32_e32 v6, v5 +; VI-NEXT: v_mul_f32_e32 v7, v4, v6 +; VI-NEXT: v_mad_f32 v8, -v5, v7, v4 +; VI-NEXT: v_mac_f32_e32 v7, v8, v6 +; VI-NEXT: v_mad_f32 v4, -v5, v7, v4 +; VI-NEXT: v_mul_f32_e32 v4, v4, v6 +; VI-NEXT: v_and_b32_e32 v4, 0xff800000, v4 +; VI-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; VI-NEXT: v_cvt_f32_f16_e32 v5, s5 +; VI-NEXT: v_div_fixup_f16 v4, v4, v3, s7 +; VI-NEXT: v_trunc_f16_e32 v4, v4 +; VI-NEXT: v_fma_f16 v3, -v4, v3, s7 +; VI-NEXT: v_cvt_f32_f16_e32 v4, s3 +; VI-NEXT: v_rcp_f32_e32 v6, v5 +; VI-NEXT: v_mul_f32_e32 v7, v4, v6 +; VI-NEXT: v_mad_f32 v8, -v5, v7, v4 +; VI-NEXT: v_mac_f32_e32 v7, v8, v6 +; VI-NEXT: v_mad_f32 v4, -v5, v7, v4 +; VI-NEXT: v_mul_f32_e32 v4, v4, v6 +; VI-NEXT: v_and_b32_e32 v4, 0xff800000, v4 +; VI-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; VI-NEXT: v_cvt_f32_f16_e32 v5, s8 +; VI-NEXT: v_div_fixup_f16 v4, v4, v0, s3 +; VI-NEXT: v_trunc_f16_e32 v4, v4 +; VI-NEXT: v_fma_f16 v4, -v4, v0, s3 +; VI-NEXT: v_cvt_f32_f16_e32 v0, s6 +; VI-NEXT: v_rcp_f32_e32 v6, v5 +; VI-NEXT: v_mul_f32_e32 v7, v0, v6 +; VI-NEXT: v_mad_f32 v8, -v5, v7, v0 +; VI-NEXT: v_mac_f32_e32 v7, v8, v6 +; VI-NEXT: v_mad_f32 v0, -v5, v7, v0 +; VI-NEXT: v_mul_f32_e32 v0, v0, v6 ; VI-NEXT: v_and_b32_e32 v0, 0xff800000, v0 -; VI-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-NEXT: v_add_f32_e32 v0, v0, v7 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, s8 -; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: v_div_fixup_f16 v0, v0, v1, s2 +; VI-NEXT: v_div_fixup_f16 v0, v0, v1, s6 ; VI-NEXT: v_trunc_f16_e32 v0, v0 -; VI-NEXT: v_fma_f16 v0, -v0, v1, s2 -; VI-NEXT: v_cvt_f32_f16_e32 v1, s6 -; VI-NEXT: v_rcp_f32_e32 v4, v3 -; VI-NEXT: v_mul_f32_e32 v5, v1, v4 -; VI-NEXT: v_mad_f32 v6, -v3, v5, v1 -; VI-NEXT: v_mac_f32_e32 v5, v6, v4 -; VI-NEXT: v_mad_f32 v1, -v3, v5, v1 -; VI-NEXT: v_mul_f32_e32 v1, v1, v4 -; VI-NEXT: v_and_b32_e32 v1, 0xff800000, v1 -; VI-NEXT: v_add_f32_e32 v1, v1, v5 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, s5 -; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: v_div_fixup_f16 v1, v1, v2, s6 -; VI-NEXT: v_trunc_f16_e32 v1, v1 -; VI-NEXT: v_fma_f16 v1, -v1, v2, s6 -; VI-NEXT: v_cvt_f32_f16_e32 v2, s3 -; VI-NEXT: v_rcp_f32_e32 v5, v4 +; VI-NEXT: v_fma_f16 v1, -v0, v1, s6 +; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: v_mul_f32_e32 v6, v2, v5 -; VI-NEXT: v_mad_f32 v7, -v4, v6, v2 -; VI-NEXT: v_mac_f32_e32 v6, v7, v5 -; VI-NEXT: v_mad_f32 v2, -v4, v6, v2 -; VI-NEXT: v_mul_f32_e32 v2, v2, v5 -; VI-NEXT: v_and_b32_e32 v2, 0xff800000, v2 -; VI-NEXT: v_add_f32_e32 v2, v2, v6 -; VI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_e32 v5, s9 -; VI-NEXT: v_mov_b32_e32 v4, s9 -; VI-NEXT: v_div_fixup_f16 v2, v2, v3, s3 -; VI-NEXT: v_trunc_f16_e32 v2, v2 -; VI-NEXT: v_fma_f16 v2, -v2, v3, s3 -; VI-NEXT: v_cvt_f32_f16_e32 v3, s7 -; VI-NEXT: v_rcp_f32_e32 v6, v5 -; VI-NEXT: v_mul_f32_e32 v7, v3, v6 -; VI-NEXT: v_mad_f32 v8, -v5, v7, v3 -; VI-NEXT: v_mac_f32_e32 v7, v8, v6 -; VI-NEXT: v_mad_f32 v3, -v5, v7, v3 -; VI-NEXT: v_mul_f32_e32 v3, v3, v6 -; VI-NEXT: v_and_b32_e32 v3, 0xff800000, v3 -; VI-NEXT: v_add_f32_e32 v3, v3, v7 -; VI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; VI-NEXT: v_div_fixup_f16 v3, v3, v4, s7 -; VI-NEXT: v_trunc_f16_e32 v3, v3 -; VI-NEXT: v_fma_f16 v3, -v3, v4, s7 -; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; VI-NEXT: v_or_b32_e32 v1, v2, v1 ; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_or_b32_e32 v1, v4, v1 ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index e60739fd84059..3240d87239d3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -3645,6 +3645,7 @@ define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshl_i16_ssv: @@ -3713,6 +3714,7 @@ define amdgpu_ps half @v_fshl_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) ; GFX6-NEXT: s_lshl_b32 s0, s0, s2 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshl_i16_svs: @@ -3776,6 +3778,7 @@ define amdgpu_ps half @v_fshl_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshl_i16_vss: @@ -4671,6 +4674,9 @@ define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshl_v3i16: @@ -4698,12 +4704,15 @@ define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v2, v1 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v3 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshl_v3i16: @@ -4722,47 +4731,51 @@ define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2 +; GFX9-NEXT: s_mov_b32 s4, 0xffff ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fshl_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 -; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1] -; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0 +; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v7 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v6, v2 -; GFX10-NEXT: v_pk_lshrrev_b16 v3, v7, v3 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v3 +; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v0, v0 +; GFX10-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 -; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] +; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1] -; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v4, v0 +; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v7 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v5, v1 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v6, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_pk_lshrrev_b16 v3, v7, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v0, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) %cast.result = bitcast <3 x i16> %result to <3 x half> @@ -5036,6 +5049,10 @@ define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshl_v4i16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index 36a6614a5620c..50163ebbf4d6b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -3389,6 +3389,7 @@ define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) ; GFX6-NEXT: s_and_b32 s0, s1, 0xffff ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshr_i16_ssv: @@ -3453,6 +3454,7 @@ define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshr_i16_svs: @@ -3517,6 +3519,7 @@ define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: s_lshr_b32 s0, s0, s1 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshr_i16_vss: @@ -4584,6 +4587,9 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshr_v3i16: @@ -4624,13 +4630,16 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v3 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshr_v3i16: @@ -4649,47 +4658,51 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 +; GFX9-NEXT: s_mov_b32 s4, 0xffff ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fshr_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 -; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 +; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v7 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 -; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 +; GFX10-NEXT: v_pk_lshlrev_b16 v1, v4, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 -; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] -; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2 +; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v7 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_pk_lshlrev_b16 v1, v4, v1 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) %cast.result = bitcast <3 x i16> %result to <3 x half> @@ -5009,36 +5022,50 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4 -; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9 -; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7 -; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 -; GFX6-NEXT: v_and_b32_e32 v8, 15, v6 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 -; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 -; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX6-NEXT: v_and_b32_e32 v4, 15, v7 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 -; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 -; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 -; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v6, 1, 15 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 14, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v3 +; GFX6-NEXT: v_bfe_u32 v3, v7, 1, 15 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3 +; GFX6-NEXT: v_xor_b32_e32 v5, -1, v9 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v7 +; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v5 +; GFX6-NEXT: v_and_b32_e32 v7, 15, v5 +; GFX6-NEXT: v_xor_b32_e32 v5, -1, v5 +; GFX6-NEXT: v_and_b32_e32 v5, 15, v5 +; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v7 +; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v7, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v5, v3 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX6-NEXT: v_and_b32_e32 v3, 15, v6 +; GFX6-NEXT: v_xor_b32_e32 v5, -1, v6 +; GFX6-NEXT: v_and_b32_e32 v5, 15, v5 +; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, v3, v2 +; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_or_b32_e32 v2, v1, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshr_v4i16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 1f4330afb58d2..f2fdc6fef2d35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -7,9 +7,9 @@ define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1) = G_LOAD [[DEF]](p1) :: (load (i1) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LOAD]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, ptr addrspace(1) undef ret i1 %val @@ -19,9 +19,9 @@ define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1) = G_LOAD [[DEF]](p1) :: (load (i1) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, ptr addrspace(1) undef ret i1 %val @@ -31,9 +31,9 @@ define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1) = G_LOAD [[DEF]](p1) :: (load (i1) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, ptr addrspace(1) undef ret i1 %val @@ -43,9 +43,9 @@ define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i7) = G_LOAD [[DEF]](p1) :: (load (i7) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LOAD]](i7) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, ptr addrspace(1) undef ret i7 %val @@ -55,9 +55,9 @@ define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i7) = G_LOAD [[DEF]](p1) :: (load (i7) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i7) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, ptr addrspace(1) undef ret i7 %val @@ -67,9 +67,9 @@ define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i7) = G_LOAD [[DEF]](p1) :: (load (i7) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i7) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, ptr addrspace(1) undef ret i7 %val @@ -79,9 +79,9 @@ define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (load (i8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LOAD]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, ptr addrspace(1) undef ret i8 %val @@ -91,9 +91,9 @@ define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (load (i8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, ptr addrspace(1) undef ret i8 %val @@ -103,9 +103,9 @@ define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (load (i8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, ptr addrspace(1) undef ret i8 %val @@ -115,9 +115,9 @@ define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i16) = G_LOAD [[DEF]](p1) :: (load (i16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LOAD]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, ptr addrspace(1) undef ret i16 %val @@ -127,9 +127,9 @@ define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i16) = G_LOAD [[DEF]](p1) :: (load (i16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, ptr addrspace(1) undef ret i16 %val @@ -139,9 +139,9 @@ define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i16) = G_LOAD [[DEF]](p1) :: (load (i16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, ptr addrspace(1) undef ret i16 %val @@ -151,9 +151,10 @@ define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(f16) = G_LOAD [[DEF]](p1) :: (load (f16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[LOAD]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load half, ptr addrspace(1) undef ret half %val @@ -163,9 +164,9 @@ define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i24) = G_LOAD [[DEF]](p1) :: (load (i24) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LOAD]](i24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, ptr addrspace(1) undef ret i24 %val @@ -175,9 +176,9 @@ define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i24) = G_LOAD [[DEF]](p1) :: (load (i24) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i24) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, ptr addrspace(1) undef ret i24 %val @@ -187,9 +188,9 @@ define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i24) = G_LOAD [[DEF]](p1) :: (load (i24) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i24) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, ptr addrspace(1) undef ret i24 %val @@ -199,12 +200,12 @@ define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i24>) = G_LOAD [[DEF]](p1) :: (load (<2 x i24>) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i24), [[UV1:%[0-9]+]]:_(i24) = G_UNMERGE_VALUES [[LOAD]](<2 x i24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, ptr addrspace(1) undef ret <2 x i24> %val @@ -214,14 +215,14 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `ptr addrspace(1) undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i24>) = G_LOAD [[DEF]](p1) :: (load (<3 x i24>) from `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i24), [[UV1:%[0-9]+]]:_(i24), [[UV2:%[0-9]+]]:_(i24) = G_UNMERGE_VALUES [[LOAD]](<3 x i24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i24) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, ptr addrspace(1) undef ret <3 x i24> %val @@ -231,8 +232,8 @@ define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p1) :: (load (i32) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i32, ptr addrspace(1) undef ret i32 %val @@ -242,11 +243,11 @@ define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i48) = G_LOAD [[DEF]](p1) :: (load (i48) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val @@ -256,11 +257,11 @@ define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i48) = G_LOAD [[DEF]](p1) :: (load (i48) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val @@ -270,11 +271,11 @@ define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i48) = G_LOAD [[DEF]](p1) :: (load (i48) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ZEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val @@ -284,10 +285,10 @@ define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[DEF]](p1) :: (load (i64) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i64, ptr addrspace(1) undef ret i64 %val @@ -297,12 +298,12 @@ define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i65) = G_LOAD [[DEF]](p1) :: (load (i65) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i96) = G_ANYEXT [[LOAD]](i65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, ptr addrspace(1) undef ret i65 %val @@ -312,12 +313,12 @@ define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i65) = G_LOAD [[DEF]](p1) :: (load (i65) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i96) = G_SEXT [[LOAD]](i65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT]](i96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, ptr addrspace(1) undef ret i65 %val @@ -327,12 +328,12 @@ define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i65) = G_LOAD [[DEF]](p1) :: (load (i65) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i96) = G_ZEXT [[LOAD]](i65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ZEXT]](i96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, ptr addrspace(1) undef ret i65 %val @@ -342,8 +343,8 @@ define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(f32) = G_LOAD [[DEF]](p1) :: (load (f32) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load float, ptr addrspace(1) undef ret float %val @@ -353,10 +354,11 @@ define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(f64) = G_LOAD [[DEF]](p1) :: (load (f64) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[LOAD]](f64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load double, ptr addrspace(1) undef ret double %val @@ -366,12 +368,13 @@ define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x f64>) = G_LOAD [[DEF]](p1) :: (load (<2 x f64>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[LOAD]](<2 x f64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, ptr addrspace(1) undef ret <2 x double> %val @@ -381,10 +384,10 @@ define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[DEF]](p1) :: (load (<2 x i32>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, ptr addrspace(1) undef ret <2 x i32> %val @@ -394,11 +397,11 @@ define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `ptr addrspace(1) undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[DEF]](p1) :: (load (<3 x i32>) from `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<3 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, ptr addrspace(1) undef ret <3 x i32> %val @@ -408,12 +411,12 @@ define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[DEF]](p1) :: (load (<4 x i32>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, ptr addrspace(1) undef ret <4 x i32> %val @@ -423,13 +426,13 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `ptr addrspace(1) undef`, align 32, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x i32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x i32>) from `ptr addrspace(1) undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<5 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, ptr addrspace(1) undef ret <5 x i32> %val @@ -440,16 +443,16 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x i32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<8 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <8 x i32>, ptr addrspace(1) %ptr @@ -461,24 +464,24 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x i32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<16 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i32>, ptr addrspace(1) %ptr @@ -490,40 +493,40 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x i32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<32 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <32 x i32>, ptr addrspace(1) %ptr @@ -534,12 +537,12 @@ define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[DEF]](p1) :: (load (<2 x i64>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, ptr addrspace(1) undef ret <2 x i64> %val @@ -550,14 +553,14 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x i64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <3 x i64>, ptr addrspace(1) %ptr @@ -569,16 +572,16 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x i64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<4 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <4 x i64>, ptr addrspace(1) %ptr @@ -590,18 +593,18 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x i64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x i64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<5 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <5 x i64>, ptr addrspace(1) %ptr @@ -613,24 +616,24 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x i64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<8 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <8 x i64>, ptr addrspace(1) %ptr @@ -642,40 +645,40 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x i64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<16 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i64>, ptr addrspace(1) %ptr @@ -686,8 +689,8 @@ define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[DEF]](p1) :: (load (<2 x i16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x i16>, ptr addrspace(1) undef ret <2 x i16> %val @@ -697,8 +700,8 @@ define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x f16>) = G_LOAD [[DEF]](p1) :: (load (<2 x f16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x f16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x half>, ptr addrspace(1) undef ret <2 x half> %val @@ -708,13 +711,13 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i16>) = G_LOAD [[DEF]](p1) :: (load (<3 x i16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD]](<3 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[DEF1]](i16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, ptr addrspace(1) undef ret <3 x i16> %val @@ -724,10 +727,10 @@ define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[DEF]](p1) :: (load (<4 x i16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, ptr addrspace(1) undef ret <4 x i16> %val @@ -737,10 +740,10 @@ define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x f16>) = G_LOAD [[DEF]](p1) :: (load (<4 x f16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[LOAD]](<4 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x f16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, ptr addrspace(1) undef ret <4 x half> %val @@ -751,14 +754,14 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x i16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x i16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD1]](<5 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16), [[DEF1]](i16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <5 x i16>, ptr addrspace(1) %ptr @@ -770,12 +773,12 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x i16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <8 x i16>, ptr addrspace(1) %ptr @@ -787,16 +790,16 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x i16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x i16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x i16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x i16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x i16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x i16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i16>, ptr addrspace(1) %ptr @@ -808,56 +811,56 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x i8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8), [[UV3:%[0-9]+]]:_(i8), [[UV4:%[0-9]+]]:_(i8), [[UV5:%[0-9]+]]:_(i8), [[UV6:%[0-9]+]]:_(i8), [[UV7:%[0-9]+]]:_(i8), [[UV8:%[0-9]+]]:_(i8), [[UV9:%[0-9]+]]:_(i8), [[UV10:%[0-9]+]]:_(i8), [[UV11:%[0-9]+]]:_(i8), [[UV12:%[0-9]+]]:_(i8), [[UV13:%[0-9]+]]:_(i8), [[UV14:%[0-9]+]]:_(i8), [[UV15:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<16 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[UV3]](i8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i16) = G_ANYEXT [[UV4]](i8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i16) = G_ANYEXT [[UV5]](i8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i16) = G_ANYEXT [[UV6]](i8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i16) = G_ANYEXT [[UV7]](i8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i16) = G_ANYEXT [[UV8]](i8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i16) = G_ANYEXT [[UV9]](i8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i16) = G_ANYEXT [[UV10]](i8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i16) = G_ANYEXT [[UV11]](i8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(i16) = G_ANYEXT [[UV12]](i8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(i16) = G_ANYEXT [[UV13]](i8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(i16) = G_ANYEXT [[UV14]](i8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(i16) = G_ANYEXT [[UV15]](i8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](i32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](i32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](i32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT3]](i16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](i32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT4]](i16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](i32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT5]](i16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](i32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT6]](i16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](i32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT7]](i16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](i32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT8]](i16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](i32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT9]](i16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](i32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT10]](i16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](i32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT11]](i16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](i32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT12]](i16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](i32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT13]](i16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](i32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT14]](i16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](i32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT15]](i16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i8>, ptr addrspace(1) %ptr @@ -868,14 +871,14 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[DEF]](p1) :: (load (<2 x i8>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD]](<2 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](i32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, ptr addrspace(1) undef ret <2 x i8> %val @@ -885,17 +888,17 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i8>) = G_LOAD [[DEF]](p1) :: (load (<3 x i8>) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD]](<3 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](i32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](i32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, ptr addrspace(1) undef ret <3 x i8> %val @@ -906,20 +909,20 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x i8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8), [[UV3:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<4 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[UV3]](i8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](i32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](i32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](i32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT3]](i16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <4 x i8>, ptr addrspace(1) %ptr @@ -930,13 +933,13 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (load (i8) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LOAD]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, ptr addrspace(1) undef ret { i8, i32 } %val @@ -949,12 +952,12 @@ define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %ar ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: %13:_(p5) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), %13(p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (volatile load (i8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p1) :: (volatile load (i32) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: %13:_(p5) = nuw nusw G_PTR_ADD [[COPY]], [[C]](i32) + ; CHECK-NEXT: G_STORE [[LOAD]](i8), [[COPY]](p5) :: (store (i8) into %ir.arg0, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), %13(p5) :: (store (i32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, ptr addrspace(1) undef %val1 = load volatile i32, ptr addrspace(1) undef @@ -977,8 +980,8 @@ define <33 x i32> @v33i32_func_void() #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x i32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x i32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x i32>), [[COPY]](p5) :: (store (<33 x i32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <33 x i32>, ptr addrspace(1) %ptr @@ -991,17 +994,17 @@ define <33 x i32> @v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 { ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](i64) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x i32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x i32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x i32>), [[COPY]](p5) :: (store (<33 x i32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN %gep = getelementptr inbounds <33 x i32>, ptr addrspace(1) %p, i32 %idx %val = load <33 x i32>, ptr addrspace(1) %gep @@ -1016,14 +1019,14 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x i32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x i32>), [[COPY]](p5) :: (store (<32 x i32>), addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CHECK-NEXT: G_STORE [[LOAD2]](i32), [[PTR_ADD1]](p5) :: (store (i32), align 128, addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr @@ -1038,14 +1041,14 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[LOAD]](p1) :: (load (i32) from %ir.ptr, align 128, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x i32>) from %ir.ptr + 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[COPY]](p5) :: (store (i32), align 128, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x i32>), [[PTR_ADD1]](p5) :: (store (<32 x i32>), addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr @@ -1057,23 +1060,23 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p3) :: (volatile load (i32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p3) :: (volatile load (i32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p3) :: (volatile load (i32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p3) :: (volatile load (i32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x i32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](i32), [[C]](i32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x i32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x i32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](i32), [[C2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[IVEC2]](<3 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, ptr addrspace(3) undef %load1 = load volatile i32, ptr addrspace(3) undef @@ -1092,23 +1095,23 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x f32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(f32) = G_LOAD [[DEF]](p3) :: (volatile load (f32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(f32) = G_LOAD [[DEF]](p3) :: (volatile load (f32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(f32) = G_LOAD [[DEF]](p3) :: (volatile load (f32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[DEF]](p3) :: (volatile load (i32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x f32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](f32), [[C]](i32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x f32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](f32), [[C1]](i32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x f32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](f32), [[C2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[IVEC2]](<3 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, ptr addrspace(3) undef %load1 = load volatile float, ptr addrspace(3) undef @@ -1129,17 +1132,17 @@ define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C2]](i32) + ; CHECK-NEXT: G_STORE [[LSHR]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32 @@ -1157,41 +1160,41 @@ define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1022) = G_LOAD [[DEF]](p1) :: (load (i1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i1024) = G_ANYEXT [[LOAD]](i1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, ptr addrspace(1) undef ret i1022 %val @@ -1201,41 +1204,41 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1022) = G_LOAD [[DEF]](p1) :: (load (i1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i1024) = G_SEXT [[LOAD]](i1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT]](i1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, ptr addrspace(1) undef ret i1022 %val @@ -1245,41 +1248,41 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1022) = G_LOAD [[DEF]](p1) :: (load (i1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i1024) = G_ZEXT [[LOAD]](i1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ZEXT]](i1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, ptr addrspace(1) undef ret i1022 %val @@ -1294,25 +1297,25 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x i32>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](i64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `ptr addrspace(1) undef` + 128, align 128, addrspace 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](i64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `ptr addrspace(1) undef` + 136, addrspace 1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `ptr addrspace(1) undef` + 144, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x i32>), [[COPY]](p5) :: (store (<32 x i32>), addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) ; CHECK-NEXT: SI_RETURN %val = load volatile %struct.with.ptrs, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll index 6cfa463ce65dd..bce839ab6c7be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll @@ -5,15 +5,15 @@ define <2 x i65> @v2i65_func_void() #0 { ; CHECK-LABEL: name: v2i65_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s65>) = G_LOAD [[DEF]](p1) :: (load (<2 x s65>) from `ptr addrspace(1) undef`, align 32, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s96>) = G_ANYEXT [[LOAD]](<2 x s65>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](<2 x s96>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i65>) = G_LOAD [[DEF]](p1) :: (load (<2 x i65>) from `ptr addrspace(1) undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x i96>) = G_ANYEXT [[LOAD]](<2 x i65>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](<2 x i96>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %val = load <2 x i65>, ptr addrspace(1) undef ret <2 x i65> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll index 52b76395d4f5a..b67625d11925b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw @@ -25,7 +25,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: S_ENDPGM 0 ; ; GFX942-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw @@ -36,7 +36,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw @@ -47,7 +47,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void @@ -104,10 +104,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 ; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX908-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[V_READLANE_B32_]] + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX908-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec + ; GFX908-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec ; GFX908-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: S_BRANCH %bb.3 ; GFX908-NEXT: {{ $}} @@ -115,7 +116,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX908-NEXT: successors: %bb.4(0x80000000) ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.4.Flow: ; GFX908-NEXT: successors: %bb.5(0x80000000) @@ -176,10 +177,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec - ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec + ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[V_READLANE_B32_]] + ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec + ; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec ; GFX90A-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.3 ; GFX90A-NEXT: {{ $}} @@ -187,7 +189,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A-NEXT: successors: %bb.4(0x80000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.Flow: ; GFX90A-NEXT: successors: %bb.5(0x80000000) @@ -248,10 +250,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 ; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec - ; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec + ; GFX942-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[V_READLANE_B32_]] + ; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec + ; GFX942-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec ; GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX942-NEXT: S_BRANCH %bb.3 ; GFX942-NEXT: {{ $}} @@ -259,7 +262,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX942-NEXT: successors: %bb.4(0x80000000) ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: bb.4.Flow: ; GFX942-NEXT: successors: %bb.5(0x80000000) @@ -279,7 +282,6 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32_xm0_xexec = SI_PS_LIVE ; GFX11-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.2 @@ -294,8 +296,8 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY4]], [[COPY5]], implicit $exec ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF - ; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY6]], [[DEF1]], implicit $exec + ; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY6]], [[DEF]], implicit $exec ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec @@ -308,7 +310,8 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF1]] ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], [[COPY11]], 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_PERMLANEX16_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_ADD_F32_e64_4]], implicit $exec @@ -321,7 +324,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11-NEXT: successors: %bb.4(0x80000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.4.Flow: ; GFX11-NEXT: successors: %bb.5(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll index cc24188790536..c815ecd4858fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll @@ -13,7 +13,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -25,7 +25,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -37,7 +37,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 @@ -98,10 +98,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec - ; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec + ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[V_READLANE_B32_]] + ; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY19]], implicit $exec + ; GFX90A-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY20]], implicit $exec ; GFX90A-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.3 ; GFX90A-NEXT: {{ $}} @@ -109,7 +110,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: successors: %bb.5(0x80000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: S_BRANCH %bb.5 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.Flow: @@ -125,11 +126,12 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 ; GFX90A-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec + ; GFX90A-NEXT: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[V_READFIRSTLANE_B32_]] ; GFX90A-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec - ; GFX90A-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY20]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY21]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GFX90A-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY21]] + ; GFX90A-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY22]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY21]] + ; GFX90A-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY23]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.4 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.6 (%ir-block.41): @@ -189,10 +191,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec ; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 ; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec - ; GFX942-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec + ; GFX942-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[V_READLANE_B32_]] + ; GFX942-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY19]], implicit $exec + ; GFX942-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY20]], implicit $exec ; GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX942-NEXT: S_BRANCH %bb.3 ; GFX942-NEXT: {{ $}} @@ -200,7 +203,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX942-NEXT: successors: %bb.5(0x80000000) ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX942-NEXT: S_BRANCH %bb.5 ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: bb.4.Flow: @@ -216,11 +219,12 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX942-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 ; GFX942-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec + ; GFX942-NEXT: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[V_READFIRSTLANE_B32_]] ; GFX942-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec - ; GFX942-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX942-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY20]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec - ; GFX942-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX942-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY21]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GFX942-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY21]] + ; GFX942-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY22]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY21]] + ; GFX942-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY23]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GFX942-NEXT: S_BRANCH %bb.4 ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: bb.6 (%ir-block.41): @@ -266,7 +270,8 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; GFX11-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[COPY11]], 0, implicit $exec ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec @@ -279,10 +284,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READLANE_B32_]], [[S_MOV_B32_4]], [[V_MOV_B32_dpp5]] ; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 31 ; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]] - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_1]] - ; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY14]], implicit $exec - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_LO_U32_B32_e64_]], [[COPY15]], implicit $exec + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[V_READLANE_B32_1]] + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY15]], implicit $exec + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_LO_U32_B32_e64_]], [[COPY16]], implicit $exec ; GFX11-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.3 ; GFX11-NEXT: {{ $}} @@ -290,7 +296,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: successors: %bb.5(0x80000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: S_BRANCH %bb.5 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.4.Flow: @@ -306,11 +312,12 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 ; GFX11-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[V_READFIRSTLANE_B32_]] ; GFX11-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_WRITELANE_B32_]], implicit $exec - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY16]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX11-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_5]], 0, [[COPY17]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY18]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX11-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_5]], 0, [[COPY19]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GFX11-NEXT: S_BRANCH %bb.4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.6 (%ir-block.38): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll index 66fad3646911a..9a2a1163f6f59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll @@ -14,17 +14,17 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s64) from %ir.ptr, addrspace 1) + ; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (f64) from %ir.ptr, addrspace 1) ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2.atomicrmw.start: ; GFX90A-NEXT: successors: %bb.3(0x04000000), %bb.2(0x7c000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI %13, %bb.2, [[S_MOV_B64_]], %bb.1 - ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.1, %19, %bb.2 + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI %17, %bb.2, [[S_MOV_B64_]], %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.1, %23, %bb.2 ; GFX90A-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI1]], 0, [[REG_SEQUENCE1]], 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[V_ADD_F64_e64_]], %subreg.sub0_sub1, [[PHI1]], %subreg.sub2_sub3 - ; GFX90A-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic monotonic (i64) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]], [[PHI1]], implicit $exec ; GFX90A-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64_xexec = SI_IF_BREAK [[V_CMP_EQ_U64_e64_]], [[PHI]], implicit-def $scc ; GFX90A-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec @@ -45,7 +45,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f64) on %ir.ptr, addrspace 1) ; GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret void @@ -63,17 +63,17 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) % ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s64) from %ir.ptr, addrspace 1) + ; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (f64) from %ir.ptr, addrspace 1) ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2.atomicrmw.start: ; GFX90A-NEXT: successors: %bb.3(0x04000000), %bb.2(0x7c000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI %13, %bb.2, [[S_MOV_B64_]], %bb.1 - ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.1, %24, %bb.2 + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI %17, %bb.2, [[S_MOV_B64_]], %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.1, %29, %bb.2 ; GFX90A-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI1]], 0, [[REG_SEQUENCE1]], 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[V_ADD_F64_e64_]], %subreg.sub0_sub1, [[PHI1]], %subreg.sub2_sub3 - ; GFX90A-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic monotonic (i64) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]], [[PHI1]], implicit $exec ; GFX90A-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64_xexec = SI_IF_BREAK [[V_CMP_EQ_U64_e64_]], [[PHI]], implicit-def $scc ; GFX90A-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec @@ -101,7 +101,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) % ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f64) on %ir.ptr, addrspace 1) ; GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec @@ -125,7 +125,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (f64) on %ir.ptr, addrspace 1) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret void @@ -143,7 +143,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (f64) on %ir.ptr, addrspace 1) ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll index 9c0db4cd162fc..ce6a9f4f1f2c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll @@ -12,7 +12,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn @@ -23,7 +23,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void @@ -39,7 +39,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn @@ -51,7 +51,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll index 62620a8875a3a..947fbf0523dee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll @@ -11,7 +11,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 @@ -28,7 +28,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1) ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x f16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index da17977602cb1..9c60c8c8a5e79 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -19,17 +19,17 @@ define ptr addrspace(4) @external_constant_got() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant, target-flags(amdgpu-gotprel32-hi) @external_constant, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](p4) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GCN-PAL-LABEL: name: external_constant_got ; GCN-PAL: bb.1 (%ir-block.0): - ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant - ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant - ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) - ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](i32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](i32) ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(4) @external_constant } @@ -40,17 +40,17 @@ define ptr addrspace(1) @external_global_got() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global, target-flags(amdgpu-gotprel32-hi) @external_global, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](p1) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GCN-PAL-LABEL: name: external_global_got ; GCN-PAL: bb.1 (%ir-block.0): - ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global - ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global - ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) - ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](i32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](i32) ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(1) @external_global } @@ -61,17 +61,17 @@ define ptr addrspace(999) @external_other_got() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other, target-flags(amdgpu-gotprel32-hi) @external_other, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](p999) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GCN-PAL-LABEL: name: external_other_got ; GCN-PAL: bb.1 (%ir-block.0): - ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other - ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other - ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) - ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](i32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](i32) ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(999) @external_other } @@ -81,17 +81,17 @@ define ptr addrspace(4) @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant, target-flags(amdgpu-rel32-hi) @internal_constant, implicit-def $scc - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GCN-PAL-LABEL: name: internal_constant_pcrel ; GCN-PAL: bb.1 (%ir-block.0): - ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant - ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant - ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) - ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](i32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](i32) ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(4) @internal_constant } @@ -101,17 +101,17 @@ define ptr addrspace(1) @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global, target-flags(amdgpu-rel32-hi) @internal_global, implicit-def $scc - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GCN-PAL-LABEL: name: internal_global_pcrel ; GCN-PAL: bb.1 (%ir-block.0): - ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global - ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global - ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) - ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](i32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](i32) ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(1) @internal_global } @@ -121,17 +121,17 @@ define ptr addrspace(999) @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other, target-flags(amdgpu-rel32-hi) @internal_other, implicit-def $scc - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GCN-PAL-LABEL: name: internal_other_pcrel ; GCN-PAL: bb.1 (%ir-block.0): - ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other - ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other - ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) - ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](i32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](i32) ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(999) @internal_other } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index d94bf3af3e2f9..3c5f75990ce8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -169,9 +169,11 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; CHECK-NEXT: ; implicit-def: $sgpr6 +; CHECK-NEXT: ; implicit-def: $sgpr5 ; CHECK-NEXT: ; implicit-def: $sgpr4 -; CHECK-NEXT: v_mov_b32_e32 v1, s4 -; CHECK-NEXT: v_mov_b32_e32 v2, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s6 +; CHECK-NEXT: v_mov_b32_e32 v2, s5 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll index 0b0c7b7df2570..cd4ac59408126 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll @@ -94,16 +94,16 @@ define void @use_vector_too_small(<8 x i32> %arg) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: SI_RETURN call void asm sideeffect "; use $0", "{v[0:7]}"(<8 x i32> %arg) @@ -116,15 +116,15 @@ define void @use_vector_too_big(<8 x i32> %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg) @@ -137,9 +137,9 @@ define void @use_scalar_too_small(i64 %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v0}"(i64 %arg) @@ -152,7 +152,7 @@ define void @use_scalar_too_big(i32 %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg) @@ -165,9 +165,9 @@ define void @use_pointer_too_small(ptr addrspace(1) %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll index 3206f8e55f44e..df5ee017fe65f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll @@ -8,24 +8,27 @@ define <3 x bfloat> @v3bf16(<3 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<3 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<3 x s16>) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x i16>) = G_TRUNC [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x bf16>) = G_BITCAST [[TRUNC]](<3 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(bf16) = G_FCONSTANT bfloat 0xR0000 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x bf16>) = G_BUILD_VECTOR [[C]](bf16), [[C]](bf16), [[C]](bf16) + ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<3 x bf16>) = G_SHUFFLE_VECTOR [[BITCAST]](<3 x bf16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(bf16), [[UV5:%[0-9]+]]:_(bf16), [[UV6:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[SHUF]](<3 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV4]](bf16) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV5]](bf16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT4]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT5]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = shufflevector <3 x bfloat> %arg0, <3 x bfloat> zeroinitializer, <3 x i32> ret <3 x bfloat> %res @@ -36,24 +39,27 @@ define <4 x bfloat> @v4bf16(<4 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<4 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<4 x s16>) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i16>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x bf16>) = G_BITCAST [[TRUNC]](<4 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(bf16) = G_FCONSTANT bfloat 0xR0000 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x bf16>) = G_BUILD_VECTOR [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16) + ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<4 x bf16>) = G_SHUFFLE_VECTOR [[BITCAST]](<4 x bf16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(bf16), [[UV5:%[0-9]+]]:_(bf16), [[UV6:%[0-9]+]]:_(bf16), [[UV7:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[SHUF]](<4 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV4]](bf16) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV5]](bf16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT4]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT5]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = shufflevector <4 x bfloat> %arg0, <4 x bfloat> zeroinitializer, <4 x i32> ret <4 x bfloat> %res @@ -64,30 +70,34 @@ define <5 x bfloat> @v5bf16(<5 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<5 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<5 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<5 x s16>) - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT6]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT7]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT8]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY2]](i32) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[UV4]](i16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[UV5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<5 x i16>) = G_TRUNC [[BUILD_VECTOR]](<5 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x bf16>) = G_BITCAST [[TRUNC]](<5 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(bf16) = G_FCONSTANT bfloat 0xR0000 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x bf16>) = G_BUILD_VECTOR [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16) + ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<5 x bf16>) = G_SHUFFLE_VECTOR [[BITCAST]](<5 x bf16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(bf16), [[UV7:%[0-9]+]]:_(bf16), [[UV8:%[0-9]+]]:_(bf16), [[UV9:%[0-9]+]]:_(bf16), [[UV10:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[SHUF]](<5 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV6]](bf16) + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV7]](bf16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[UV8]](bf16) + ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT6]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT7]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT8]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = shufflevector <5 x bfloat> %arg0, <5 x bfloat> zeroinitializer, <5 x i32> ret <5 x bfloat> %res @@ -98,30 +108,34 @@ define <6 x bfloat> @v6bf16(<6 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[BUILD_VECTOR]](<6 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<6 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<6 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<6 x s16>) - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT6]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT7]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT8]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY2]](i32) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[UV4]](i16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[UV5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<6 x i16>) = G_TRUNC [[BUILD_VECTOR]](<6 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x bf16>) = G_BITCAST [[TRUNC]](<6 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(bf16) = G_FCONSTANT bfloat 0xR0000 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<6 x bf16>) = G_BUILD_VECTOR [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16) + ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<6 x bf16>) = G_SHUFFLE_VECTOR [[BITCAST]](<6 x bf16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(bf16), [[UV7:%[0-9]+]]:_(bf16), [[UV8:%[0-9]+]]:_(bf16), [[UV9:%[0-9]+]]:_(bf16), [[UV10:%[0-9]+]]:_(bf16), [[UV11:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[SHUF]](<6 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV6]](bf16) + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV7]](bf16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[UV8]](bf16) + ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT6]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT7]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT8]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = shufflevector <6 x bfloat> %arg0, <6 x bfloat> zeroinitializer, <6 x i32> ret <6 x bfloat> %res @@ -132,36 +146,41 @@ define <7 x bfloat> @v7bf16(<7 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32) - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<7 x s16>) = G_TRUNC [[BUILD_VECTOR]](<7 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<7 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<7 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<7 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5, 6) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<7 x s16>) - ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT8]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT9]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT10]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT11]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY2]](i32) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[UV4]](i16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[UV5]](i16) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY3]](i32) + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[UV6]](i16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[UV7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<7 x i16>) = G_TRUNC [[BUILD_VECTOR]](<7 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<7 x bf16>) = G_BITCAST [[TRUNC]](<7 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(bf16) = G_FCONSTANT bfloat 0xR0000 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<7 x bf16>) = G_BUILD_VECTOR [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16) + ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<7 x bf16>) = G_SHUFFLE_VECTOR [[BITCAST]](<7 x bf16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5, 6) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(bf16), [[UV9:%[0-9]+]]:_(bf16), [[UV10:%[0-9]+]]:_(bf16), [[UV11:%[0-9]+]]:_(bf16), [[UV12:%[0-9]+]]:_(bf16), [[UV13:%[0-9]+]]:_(bf16), [[UV14:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[SHUF]](<7 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV8]](bf16) + ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV9]](bf16) + ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[UV10]](bf16) + ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[UV11]](bf16) + ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT8]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT9]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT10]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT11]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = shufflevector <7 x bfloat> %arg0, <7 x bfloat> zeroinitializer, <7 x i32> ret <7 x bfloat> %res @@ -172,36 +191,41 @@ define <8 x bfloat> @v8bf16(<8 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32) - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<8 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5, 6, 7) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<8 x s16>) - ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT8]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT9]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT10]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT11]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY2]](i32) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[UV4]](i16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[UV5]](i16) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY3]](i32) + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[UV6]](i16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[UV7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x i16>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x bf16>) = G_BITCAST [[TRUNC]](<8 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(bf16) = G_FCONSTANT bfloat 0xR0000 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x bf16>) = G_BUILD_VECTOR [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16), [[C]](bf16) + ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<8 x bf16>) = G_SHUFFLE_VECTOR [[BITCAST]](<8 x bf16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5, 6, 7) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(bf16), [[UV9:%[0-9]+]]:_(bf16), [[UV10:%[0-9]+]]:_(bf16), [[UV11:%[0-9]+]]:_(bf16), [[UV12:%[0-9]+]]:_(bf16), [[UV13:%[0-9]+]]:_(bf16), [[UV14:%[0-9]+]]:_(bf16), [[UV15:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[SHUF]](<8 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV8]](bf16) + ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV9]](bf16) + ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[UV10]](bf16) + ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[UV11]](bf16) + ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT8]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT9]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT10]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT11]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = shufflevector <8 x bfloat> %arg0, <8 x bfloat> zeroinitializer, <8 x i32> ret <8 x bfloat> %res @@ -212,57 +236,66 @@ define <16 x bfloat> @v16bf16(<16 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32) - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY4]](s32) - ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY5]](s32) - ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY6]](s32) - ; GFX9-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16) - ; GFX9-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16) - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY7]](s32) - ; GFX9-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s16) - ; GFX9-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](<16 x s16>) - ; GFX9-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s16) - ; GFX9-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s16) - ; GFX9-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s16) - ; GFX9-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s16) - ; GFX9-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[UV20]](s16) - ; GFX9-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[UV21]](s16) - ; GFX9-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[UV22]](s16) - ; GFX9-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[UV23]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) - ; GFX9-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) - ; GFX9-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) - ; GFX9-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) - ; GFX9-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY2]](i32) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[UV4]](i16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[UV5]](i16) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY3]](i32) + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[UV6]](i16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[UV7]](i16) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i16), [[UV9:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY4]](i32) + ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[UV8]](i16) + ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[UV9]](i16) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i16), [[UV11:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY5]](i32) + ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[UV10]](i16) + ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[UV11]](i16) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i16), [[UV13:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY6]](i32) + ; GFX9-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[UV12]](i16) + ; GFX9-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[UV13]](i16) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i16), [[UV15:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY7]](i32) + ; GFX9-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[UV14]](i16) + ; GFX9-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[UV15]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32), [[ANYEXT8]](i32), [[ANYEXT9]](i32), [[ANYEXT10]](i32), [[ANYEXT11]](i32), [[ANYEXT12]](i32), [[ANYEXT13]](i32), [[ANYEXT14]](i32), [[ANYEXT15]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x i16>) = G_TRUNC [[BUILD_VECTOR]](<16 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x bf16>) = G_BITCAST [[TRUNC]](<16 x i16>) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(bf16), [[UV17:%[0-9]+]]:_(bf16), [[UV18:%[0-9]+]]:_(bf16), [[UV19:%[0-9]+]]:_(bf16), [[UV20:%[0-9]+]]:_(bf16), [[UV21:%[0-9]+]]:_(bf16), [[UV22:%[0-9]+]]:_(bf16), [[UV23:%[0-9]+]]:_(bf16), [[UV24:%[0-9]+]]:_(bf16), [[UV25:%[0-9]+]]:_(bf16), [[UV26:%[0-9]+]]:_(bf16), [[UV27:%[0-9]+]]:_(bf16), [[UV28:%[0-9]+]]:_(bf16), [[UV29:%[0-9]+]]:_(bf16), [[UV30:%[0-9]+]]:_(bf16), [[UV31:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[BITCAST]](<16 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV16]](bf16) + ; GFX9-NEXT: [[ANYEXT16:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV17]](bf16) + ; GFX9-NEXT: [[ANYEXT17:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[UV18]](bf16) + ; GFX9-NEXT: [[ANYEXT18:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[UV19]](bf16) + ; GFX9-NEXT: [[ANYEXT19:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[UV20]](bf16) + ; GFX9-NEXT: [[ANYEXT20:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[UV21]](bf16) + ; GFX9-NEXT: [[ANYEXT21:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[UV22]](bf16) + ; GFX9-NEXT: [[ANYEXT22:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[UV23]](bf16) + ; GFX9-NEXT: [[ANYEXT23:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT16]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT17]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT18]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT19]](i32) + ; GFX9-NEXT: $vgpr4 = COPY [[ANYEXT20]](i32) + ; GFX9-NEXT: $vgpr5 = COPY [[ANYEXT21]](i32) + ; GFX9-NEXT: $vgpr6 = COPY [[ANYEXT22]](i32) + ; GFX9-NEXT: $vgpr7 = COPY [[ANYEXT23]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ret <16 x bfloat> %arg0 } @@ -272,105 +305,122 @@ define <32 x bfloat> @v32bf16(<32 x bfloat> %arg0) { ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32) - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY4]](s32) - ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY5]](s32) - ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY6]](s32) - ; GFX9-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16) - ; GFX9-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16) - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY7]](s32) - ; GFX9-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s16) - ; GFX9-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s16) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY8]](s32) - ; GFX9-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s16) - ; GFX9-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s16) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY9]](s32) - ; GFX9-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s16) - ; GFX9-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s16) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY10]](s32) - ; GFX9-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[UV20]](s16) - ; GFX9-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[UV21]](s16) - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY11]](s32) - ; GFX9-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[UV22]](s16) - ; GFX9-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[UV23]](s16) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY12]](s32) - ; GFX9-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[UV24]](s16) - ; GFX9-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[UV25]](s16) - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY13]](s32) - ; GFX9-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[UV26]](s16) - ; GFX9-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[UV27]](s16) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY14]](s32) - ; GFX9-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[UV28]](s16) - ; GFX9-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[UV29]](s16) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY15]](s32) - ; GFX9-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[UV30]](s16) - ; GFX9-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[UV31]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32), [[ANYEXT16]](s32), [[ANYEXT17]](s32), [[ANYEXT18]](s32), [[ANYEXT19]](s32), [[ANYEXT20]](s32), [[ANYEXT21]](s32), [[ANYEXT22]](s32), [[ANYEXT23]](s32), [[ANYEXT24]](s32), [[ANYEXT25]](s32), [[ANYEXT26]](s32), [[ANYEXT27]](s32), [[ANYEXT28]](s32), [[ANYEXT29]](s32), [[ANYEXT30]](s32), [[ANYEXT31]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s16>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](<32 x s16>) - ; GFX9-NEXT: [[ANYEXT32:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s16) - ; GFX9-NEXT: [[ANYEXT33:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s16) - ; GFX9-NEXT: [[ANYEXT34:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s16) - ; GFX9-NEXT: [[ANYEXT35:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s16) - ; GFX9-NEXT: [[ANYEXT36:%[0-9]+]]:_(s32) = G_ANYEXT [[UV36]](s16) - ; GFX9-NEXT: [[ANYEXT37:%[0-9]+]]:_(s32) = G_ANYEXT [[UV37]](s16) - ; GFX9-NEXT: [[ANYEXT38:%[0-9]+]]:_(s32) = G_ANYEXT [[UV38]](s16) - ; GFX9-NEXT: [[ANYEXT39:%[0-9]+]]:_(s32) = G_ANYEXT [[UV39]](s16) - ; GFX9-NEXT: [[ANYEXT40:%[0-9]+]]:_(s32) = G_ANYEXT [[UV40]](s16) - ; GFX9-NEXT: [[ANYEXT41:%[0-9]+]]:_(s32) = G_ANYEXT [[UV41]](s16) - ; GFX9-NEXT: [[ANYEXT42:%[0-9]+]]:_(s32) = G_ANYEXT [[UV42]](s16) - ; GFX9-NEXT: [[ANYEXT43:%[0-9]+]]:_(s32) = G_ANYEXT [[UV43]](s16) - ; GFX9-NEXT: [[ANYEXT44:%[0-9]+]]:_(s32) = G_ANYEXT [[UV44]](s16) - ; GFX9-NEXT: [[ANYEXT45:%[0-9]+]]:_(s32) = G_ANYEXT [[UV45]](s16) - ; GFX9-NEXT: [[ANYEXT46:%[0-9]+]]:_(s32) = G_ANYEXT [[UV46]](s16) - ; GFX9-NEXT: [[ANYEXT47:%[0-9]+]]:_(s32) = G_ANYEXT [[UV47]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT32]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT33]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT34]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT35]](s32) - ; GFX9-NEXT: $vgpr4 = COPY [[ANYEXT36]](s32) - ; GFX9-NEXT: $vgpr5 = COPY [[ANYEXT37]](s32) - ; GFX9-NEXT: $vgpr6 = COPY [[ANYEXT38]](s32) - ; GFX9-NEXT: $vgpr7 = COPY [[ANYEXT39]](s32) - ; GFX9-NEXT: $vgpr8 = COPY [[ANYEXT40]](s32) - ; GFX9-NEXT: $vgpr9 = COPY [[ANYEXT41]](s32) - ; GFX9-NEXT: $vgpr10 = COPY [[ANYEXT42]](s32) - ; GFX9-NEXT: $vgpr11 = COPY [[ANYEXT43]](s32) - ; GFX9-NEXT: $vgpr12 = COPY [[ANYEXT44]](s32) - ; GFX9-NEXT: $vgpr13 = COPY [[ANYEXT45]](s32) - ; GFX9-NEXT: $vgpr14 = COPY [[ANYEXT46]](s32) - ; GFX9-NEXT: $vgpr15 = COPY [[ANYEXT47]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UV]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UV1]](i16) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY1]](i32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UV2]](i16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[UV3]](i16) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY2]](i32) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[UV4]](i16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[UV5]](i16) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY3]](i32) + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[UV6]](i16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[UV7]](i16) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i16), [[UV9:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY4]](i32) + ; GFX9-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[UV8]](i16) + ; GFX9-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[UV9]](i16) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i16), [[UV11:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY5]](i32) + ; GFX9-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[UV10]](i16) + ; GFX9-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[UV11]](i16) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i16), [[UV13:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY6]](i32) + ; GFX9-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[UV12]](i16) + ; GFX9-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[UV13]](i16) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i16), [[UV15:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY7]](i32) + ; GFX9-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[UV14]](i16) + ; GFX9-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[UV15]](i16) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i16), [[UV17:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY8]](i32) + ; GFX9-NEXT: [[ANYEXT16:%[0-9]+]]:_(i32) = G_ANYEXT [[UV16]](i16) + ; GFX9-NEXT: [[ANYEXT17:%[0-9]+]]:_(i32) = G_ANYEXT [[UV17]](i16) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i16), [[UV19:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY9]](i32) + ; GFX9-NEXT: [[ANYEXT18:%[0-9]+]]:_(i32) = G_ANYEXT [[UV18]](i16) + ; GFX9-NEXT: [[ANYEXT19:%[0-9]+]]:_(i32) = G_ANYEXT [[UV19]](i16) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i16), [[UV21:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY10]](i32) + ; GFX9-NEXT: [[ANYEXT20:%[0-9]+]]:_(i32) = G_ANYEXT [[UV20]](i16) + ; GFX9-NEXT: [[ANYEXT21:%[0-9]+]]:_(i32) = G_ANYEXT [[UV21]](i16) + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i16), [[UV23:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY11]](i32) + ; GFX9-NEXT: [[ANYEXT22:%[0-9]+]]:_(i32) = G_ANYEXT [[UV22]](i16) + ; GFX9-NEXT: [[ANYEXT23:%[0-9]+]]:_(i32) = G_ANYEXT [[UV23]](i16) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i16), [[UV25:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY12]](i32) + ; GFX9-NEXT: [[ANYEXT24:%[0-9]+]]:_(i32) = G_ANYEXT [[UV24]](i16) + ; GFX9-NEXT: [[ANYEXT25:%[0-9]+]]:_(i32) = G_ANYEXT [[UV25]](i16) + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i16), [[UV27:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY13]](i32) + ; GFX9-NEXT: [[ANYEXT26:%[0-9]+]]:_(i32) = G_ANYEXT [[UV26]](i16) + ; GFX9-NEXT: [[ANYEXT27:%[0-9]+]]:_(i32) = G_ANYEXT [[UV27]](i16) + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i16), [[UV29:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY14]](i32) + ; GFX9-NEXT: [[ANYEXT28:%[0-9]+]]:_(i32) = G_ANYEXT [[UV28]](i16) + ; GFX9-NEXT: [[ANYEXT29:%[0-9]+]]:_(i32) = G_ANYEXT [[UV29]](i16) + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i16), [[UV31:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[COPY15]](i32) + ; GFX9-NEXT: [[ANYEXT30:%[0-9]+]]:_(i32) = G_ANYEXT [[UV30]](i16) + ; GFX9-NEXT: [[ANYEXT31:%[0-9]+]]:_(i32) = G_ANYEXT [[UV31]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32), [[ANYEXT8]](i32), [[ANYEXT9]](i32), [[ANYEXT10]](i32), [[ANYEXT11]](i32), [[ANYEXT12]](i32), [[ANYEXT13]](i32), [[ANYEXT14]](i32), [[ANYEXT15]](i32), [[ANYEXT16]](i32), [[ANYEXT17]](i32), [[ANYEXT18]](i32), [[ANYEXT19]](i32), [[ANYEXT20]](i32), [[ANYEXT21]](i32), [[ANYEXT22]](i32), [[ANYEXT23]](i32), [[ANYEXT24]](i32), [[ANYEXT25]](i32), [[ANYEXT26]](i32), [[ANYEXT27]](i32), [[ANYEXT28]](i32), [[ANYEXT29]](i32), [[ANYEXT30]](i32), [[ANYEXT31]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x i16>) = G_TRUNC [[BUILD_VECTOR]](<32 x i32>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x bf16>) = G_BITCAST [[TRUNC]](<32 x i16>) + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(bf16), [[UV33:%[0-9]+]]:_(bf16), [[UV34:%[0-9]+]]:_(bf16), [[UV35:%[0-9]+]]:_(bf16), [[UV36:%[0-9]+]]:_(bf16), [[UV37:%[0-9]+]]:_(bf16), [[UV38:%[0-9]+]]:_(bf16), [[UV39:%[0-9]+]]:_(bf16), [[UV40:%[0-9]+]]:_(bf16), [[UV41:%[0-9]+]]:_(bf16), [[UV42:%[0-9]+]]:_(bf16), [[UV43:%[0-9]+]]:_(bf16), [[UV44:%[0-9]+]]:_(bf16), [[UV45:%[0-9]+]]:_(bf16), [[UV46:%[0-9]+]]:_(bf16), [[UV47:%[0-9]+]]:_(bf16), [[UV48:%[0-9]+]]:_(bf16), [[UV49:%[0-9]+]]:_(bf16), [[UV50:%[0-9]+]]:_(bf16), [[UV51:%[0-9]+]]:_(bf16), [[UV52:%[0-9]+]]:_(bf16), [[UV53:%[0-9]+]]:_(bf16), [[UV54:%[0-9]+]]:_(bf16), [[UV55:%[0-9]+]]:_(bf16), [[UV56:%[0-9]+]]:_(bf16), [[UV57:%[0-9]+]]:_(bf16), [[UV58:%[0-9]+]]:_(bf16), [[UV59:%[0-9]+]]:_(bf16), [[UV60:%[0-9]+]]:_(bf16), [[UV61:%[0-9]+]]:_(bf16), [[UV62:%[0-9]+]]:_(bf16), [[UV63:%[0-9]+]]:_(bf16) = G_UNMERGE_VALUES [[BITCAST]](<32 x bf16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[UV32]](bf16) + ; GFX9-NEXT: [[ANYEXT32:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[UV33]](bf16) + ; GFX9-NEXT: [[ANYEXT33:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[UV34]](bf16) + ; GFX9-NEXT: [[ANYEXT34:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[UV35]](bf16) + ; GFX9-NEXT: [[ANYEXT35:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[UV36]](bf16) + ; GFX9-NEXT: [[ANYEXT36:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[UV37]](bf16) + ; GFX9-NEXT: [[ANYEXT37:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[UV38]](bf16) + ; GFX9-NEXT: [[ANYEXT38:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[UV39]](bf16) + ; GFX9-NEXT: [[ANYEXT39:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[UV40]](bf16) + ; GFX9-NEXT: [[ANYEXT40:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[UV41]](bf16) + ; GFX9-NEXT: [[ANYEXT41:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST10]](i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[UV42]](bf16) + ; GFX9-NEXT: [[ANYEXT42:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST11]](i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[UV43]](bf16) + ; GFX9-NEXT: [[ANYEXT43:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST12]](i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[UV44]](bf16) + ; GFX9-NEXT: [[ANYEXT44:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST13]](i16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[UV45]](bf16) + ; GFX9-NEXT: [[ANYEXT45:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST14]](i16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[UV46]](bf16) + ; GFX9-NEXT: [[ANYEXT46:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST15]](i16) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[UV47]](bf16) + ; GFX9-NEXT: [[ANYEXT47:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST16]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT32]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT33]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT34]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[ANYEXT35]](i32) + ; GFX9-NEXT: $vgpr4 = COPY [[ANYEXT36]](i32) + ; GFX9-NEXT: $vgpr5 = COPY [[ANYEXT37]](i32) + ; GFX9-NEXT: $vgpr6 = COPY [[ANYEXT38]](i32) + ; GFX9-NEXT: $vgpr7 = COPY [[ANYEXT39]](i32) + ; GFX9-NEXT: $vgpr8 = COPY [[ANYEXT40]](i32) + ; GFX9-NEXT: $vgpr9 = COPY [[ANYEXT41]](i32) + ; GFX9-NEXT: $vgpr10 = COPY [[ANYEXT42]](i32) + ; GFX9-NEXT: $vgpr11 = COPY [[ANYEXT43]](i32) + ; GFX9-NEXT: $vgpr12 = COPY [[ANYEXT44]](i32) + ; GFX9-NEXT: $vgpr13 = COPY [[ANYEXT45]](i32) + ; GFX9-NEXT: $vgpr14 = COPY [[ANYEXT46]](i32) + ; GFX9-NEXT: $vgpr15 = COPY [[ANYEXT47]](i32) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ret <32 x bfloat> %arg0 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll index 4b0ff1b2eb470..8373c60e73ee0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -11,60 +11,60 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr8 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr11 ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](i32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](i32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) - ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) - ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](s32) + ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](i32) + ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](i32) ; GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GFX10-LABEL: name: chain_call ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr8 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr11 ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](i32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](i32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) - ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) - ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](s32) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x s32>) + ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](i32) + ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](i32) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x i32>) ; GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) unreachable @@ -75,60 +75,60 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr8 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr11 ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee_preserve - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](i32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](i32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) - ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) - ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](s32) + ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](i32) + ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](i32) ; GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GFX10-LABEL: name: chain_preserve_call ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr8 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr11 ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee_preserve - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](i32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](i32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) - ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) - ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](s32) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x s32>) + ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](i32) + ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](i32) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x i32>) ; GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) unreachable diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll index 2e950117c740b..b93bfe3cd7dd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll @@ -8,8 +8,8 @@ define amdgpu_ps void @test_sendmsg(i32 inreg %m0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](i32) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll index 4c35d7bcbae2d..4117fbb4f30f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -8,14 +8,14 @@ define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounw ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8), align 8, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i8) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i8_arg @@ -23,14 +23,14 @@ define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounw ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i8) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i8 %in to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -43,14 +43,14 @@ define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroe ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8), align 8, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i8) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg @@ -58,14 +58,14 @@ define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroe ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i8) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i8 %in to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -78,14 +78,14 @@ define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signe ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) - ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) - ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8), align 8, addrspace 4) + ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD1]](i8) + ; HSA-VI-NEXT: G_STORE [[SEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg @@ -93,14 +93,14 @@ define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signe ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD1]](i8) + ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i8 %in to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -113,14 +113,14 @@ define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nou ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16), align 8, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i16) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i16_arg @@ -128,14 +128,14 @@ define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nou ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i16) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i16 %in to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -148,14 +148,14 @@ define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zer ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16), align 8, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i16) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg @@ -163,14 +163,14 @@ define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zer ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i16) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i16 %in to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -183,14 +183,14 @@ define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 sig ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) - ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) - ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16), align 8, addrspace 4) + ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD1]](i16) + ; HSA-VI-NEXT: G_STORE [[SEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg @@ -198,14 +198,14 @@ define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 sig ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD1]](i16) + ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i16 %in to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -218,13 +218,13 @@ define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nou ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i32_arg @@ -232,13 +232,13 @@ define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nou ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store i32 %in, ptr addrspace(1) %out, align 4 @@ -251,13 +251,13 @@ define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) n ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(f32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (f32), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](f32), [[LOAD]](p1) :: (store (f32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: f32_arg @@ -265,13 +265,13 @@ define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) n ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(f32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (f32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](f32), [[LOAD]](p1) :: (store (f32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store float %in, ptr addrspace(1) %out, align 4 @@ -284,13 +284,13 @@ define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i8>), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x i8>), [[LOAD]](p1) :: (store (<2 x i8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v2i8_arg @@ -298,13 +298,13 @@ define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i8>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x i8>), [[LOAD]](p1) :: (store (<2 x i8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <2 x i8> %in, ptr addrspace(1) %out @@ -317,13 +317,13 @@ define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i16>), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x i16>), [[LOAD]](p1) :: (store (<2 x i16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v2i16_arg @@ -331,13 +331,13 @@ define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i16>), addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x i16>), [[LOAD]](p1) :: (store (<2 x i16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <2 x i16> %in, ptr addrspace(1) %out @@ -350,13 +350,13 @@ define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i32>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x i32>), [[LOAD]](p1) :: (store (<2 x i32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v2i32_arg @@ -364,13 +364,13 @@ define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x i32>), [[LOAD]](p1) :: (store (<2 x i32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <2 x i32> %in, ptr addrspace(1) %out, align 4 @@ -383,13 +383,13 @@ define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x f32>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x f32>), [[LOAD]](p1) :: (store (<2 x f32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v2f32_arg @@ -397,13 +397,13 @@ define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x f32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x f32>), [[LOAD]](p1) :: (store (<2 x f32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <2 x float> %in, ptr addrspace(1) %out, align 4 @@ -416,13 +416,13 @@ define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %i ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x i8>), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x i8>), [[LOAD]](p1) :: (store (<3 x i8>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v3i8_arg @@ -430,13 +430,13 @@ define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %i ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x i8>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x i8>), [[LOAD]](p1) :: (store (<3 x i8>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <3 x i8> %in, ptr addrspace(1) %out, align 4 @@ -449,13 +449,13 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x i16>), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x i16>), [[LOAD]](p1) :: (store (<3 x i16>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v3i16_arg @@ -463,13 +463,13 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x i16>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x i16>), [[LOAD]](p1) :: (store (<3 x i16>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <3 x i16> %in, ptr addrspace(1) %out, align 4 @@ -482,13 +482,13 @@ define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x i32>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x i32>), [[LOAD]](p1) :: (store (<3 x i32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v3i32_arg @@ -496,13 +496,13 @@ define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x i32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x i32>), [[LOAD]](p1) :: (store (<3 x i32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <3 x i32> %in, ptr addrspace(1) %out, align 4 @@ -515,13 +515,13 @@ define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x f32>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x f32>), [[LOAD]](p1) :: (store (<3 x f32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v3f32_arg @@ -529,13 +529,13 @@ define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x f32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x f32>), [[LOAD]](p1) :: (store (<3 x f32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <3 x float> %in, ptr addrspace(1) %out, align 4 @@ -548,13 +548,13 @@ define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i8>), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x i8>), [[LOAD]](p1) :: (store (<4 x i8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v4i8_arg @@ -562,13 +562,13 @@ define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i8>), addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x i8>), [[LOAD]](p1) :: (store (<4 x i8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <4 x i8> %in, ptr addrspace(1) %out @@ -581,13 +581,13 @@ define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i16>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x i16>), [[LOAD]](p1) :: (store (<4 x i16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v4i16_arg @@ -595,13 +595,13 @@ define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i16>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x i16>), [[LOAD]](p1) :: (store (<4 x i16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <4 x i16> %in, ptr addrspace(1) %out @@ -614,13 +614,13 @@ define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i32>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x i32>), [[LOAD]](p1) :: (store (<4 x i32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v4i32_arg @@ -628,13 +628,13 @@ define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x i32>), [[LOAD]](p1) :: (store (<4 x i32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <4 x i32> %in, ptr addrspace(1) %out, align 4 @@ -647,13 +647,13 @@ define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x f32>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x f32>), [[LOAD]](p1) :: (store (<4 x f32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v4f32_arg @@ -661,13 +661,13 @@ define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x f32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x f32>), [[LOAD]](p1) :: (store (<4 x f32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <4 x float> %in, ptr addrspace(1) %out, align 4 @@ -680,13 +680,13 @@ define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x i8>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x i8>), [[LOAD]](p1) :: (store (<8 x i8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v8i8_arg @@ -694,13 +694,13 @@ define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x i8>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x i8>), [[LOAD]](p1) :: (store (<8 x i8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <8 x i8> %in, ptr addrspace(1) %out @@ -713,13 +713,13 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x i16>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x i16>), [[LOAD]](p1) :: (store (<8 x i16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v8i16_arg @@ -727,13 +727,13 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x i16>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x i16>), [[LOAD]](p1) :: (store (<8 x i16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <8 x i16> %in, ptr addrspace(1) %out @@ -746,13 +746,13 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x i32>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x i32>), [[LOAD]](p1) :: (store (<8 x i32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v8i32_arg @@ -760,13 +760,13 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x i32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x i32>), [[LOAD]](p1) :: (store (<8 x i32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <8 x i32> %in, ptr addrspace(1) %out, align 4 @@ -779,13 +779,13 @@ define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x f32>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x f32>), [[LOAD]](p1) :: (store (<8 x f32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v8f32_arg @@ -793,13 +793,13 @@ define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x f32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x f32>), [[LOAD]](p1) :: (store (<8 x f32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <8 x float> %in, ptr addrspace(1) %out, align 4 @@ -812,13 +812,13 @@ define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i8>), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x i8>), [[LOAD]](p1) :: (store (<16 x i8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v16i8_arg @@ -826,13 +826,13 @@ define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i8>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x i8>), [[LOAD]](p1) :: (store (<16 x i8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <16 x i8> %in, ptr addrspace(1) %out @@ -845,13 +845,13 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i16>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x i16>), [[LOAD]](p1) :: (store (<16 x i16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v16i16_arg @@ -859,13 +859,13 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i16>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x i16>), [[LOAD]](p1) :: (store (<16 x i16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <16 x i16> %in, ptr addrspace(1) %out @@ -878,13 +878,13 @@ define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i32>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x i32>), [[LOAD]](p1) :: (store (<16 x i32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v16i32_arg @@ -892,13 +892,13 @@ define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 100 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x i32>), [[LOAD]](p1) :: (store (<16 x i32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <16 x i32> %in, ptr addrspace(1) %out, align 4 @@ -911,13 +911,13 @@ define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x flo ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x f32>), align 16, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x f32>), [[LOAD]](p1) :: (store (<16 x f32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: v16f32_arg @@ -925,13 +925,13 @@ define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x flo ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 100 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x f32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x f32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x f32>), [[LOAD]](p1) :: (store (<16 x f32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store <16 x float> %in, ptr addrspace(1) %out, align 4 @@ -944,13 +944,13 @@ define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwin ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i64), [[LOAD]](p1) :: (store (i64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64 @@ -958,13 +958,13 @@ define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwin ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i64), [[LOAD]](p1) :: (store (i64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store i64 %a, ptr addrspace(1) %out, align 8 ret void @@ -976,13 +976,13 @@ define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double %in) { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(f64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (f64), addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](f64), [[LOAD]](p1) :: (store (f64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg @@ -990,13 +990,13 @@ define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double %in) { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(f64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (f64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](f64), [[LOAD]](p1) :: (store (f64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: store double %in, ptr addrspace(1) %out @@ -1009,13 +1009,13 @@ define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 8, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i1), [[LOAD]](p1) :: (store (i1) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i1_arg @@ -1023,13 +1023,13 @@ define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i1), [[LOAD]](p1) :: (store (i1) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store i1 %x, ptr addrspace(1) %out, align 1 ret void @@ -1041,14 +1041,14 @@ define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwin ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 8, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i1) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32 @@ -1056,14 +1056,14 @@ define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwin ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i1 %x to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -1076,14 +1076,14 @@ define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwin ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 8, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD1]](i1) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i64), [[LOAD]](p1) :: (store (i64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64 @@ -1091,14 +1091,14 @@ define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwin ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD1]](i1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i64), [[LOAD]](p1) :: (store (i64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i1 %x to i64 store i64 %ext, ptr addrspace(1) %out, align 8 @@ -1111,14 +1111,14 @@ define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwin ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) - ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) - ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 8, addrspace 4) + ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD1]](i1) + ; HSA-VI-NEXT: G_STORE [[SEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32 @@ -1126,14 +1126,14 @@ define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwin ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD1]](i1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i1 %x to i32 store i32 %ext, ptr addrspace(1) %out, align 4 @@ -1146,14 +1146,14 @@ define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwin ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) - ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) - ; HSA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 8, addrspace 4) + ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD1]](i1) + ; HSA-VI-NEXT: G_STORE [[SEXT]](i64), [[LOAD]](p1) :: (store (i64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64 @@ -1161,14 +1161,14 @@ define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwin ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i1), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD1]](i1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](i64), [[LOAD]](p1) :: (store (i64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i1 %x to i64 store i64 %ext, ptr addrspace(1) %out, align 8 @@ -1183,11 +1183,11 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg @@ -1195,11 +1195,11 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store i32 %arg1, ptr addrspace(1) undef ret void @@ -1211,11 +1211,11 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: empty_array_arg @@ -1223,11 +1223,11 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store i32 %arg1, ptr addrspace(1) undef ret void @@ -1247,27 +1247,27 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) - ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), addrspace 4) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i8), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i32), align 8, addrspace 4) + ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (i64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](i32), [[C5]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i64), [[C5]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](i8), [[C5]](p1) :: (volatile store (i8) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](i32), [[C5]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD4]](i64), [[C5]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment @@ -1275,27 +1275,27 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i8), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 60 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](i32), [[C5]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i64), [[C5]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](i8), [[C5]](p1) :: (volatile store (i8) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](i32), [[C5]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](i64), [[C5]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue {i32, i64} %arg0, 0 %val1 = extractvalue {i32, i64} %arg0, 1 @@ -1315,25 +1315,25 @@ define amdgpu_kernel void @pointer_in_struct_argument({ptr addrspace(3), ptr add ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) - ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), addrspace 4) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i8), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i32), align 8, addrspace 4) + ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (i64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](i8), [[C5]](p1) :: (volatile store (i8) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1343,25 +1343,25 @@ define amdgpu_kernel void @pointer_in_struct_argument({ptr addrspace(3), ptr add ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i8), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 60 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](i8), [[C5]](p1) :: (volatile store (i8) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1385,23 +1385,23 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) - ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 13 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), align 1, addrspace 4) + ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 17 + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i64), align 1, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](i32), [[C4]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i64), [[C4]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](i32), [[C4]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](i64), [[C4]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment @@ -1409,23 +1409,23 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 40 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i64), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 49 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), align 1, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 53 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i64), align 1, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](i32), [[C4]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i64), [[C4]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](i32), [[C4]](p1) :: (volatile store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](i64), [[C4]](p1) :: (volatile store (i64) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 @@ -1463,14 +1463,14 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i8) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg @@ -1478,14 +1478,14 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i8) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i8) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i8, ptr addrspace(4) %in.byref %ext = zext i8 %in to i32 @@ -1499,14 +1499,14 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i16) + ; HSA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg @@ -1514,14 +1514,14 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i16) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD1]](i16) + ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i16, ptr addrspace(4) %in.byref %ext = zext i16 %in to i32 @@ -1535,17 +1535,17 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg @@ -1553,17 +1553,17 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(4) %in.byref store volatile i32 %in, ptr addrspace(1) %out, align 4 @@ -1577,17 +1577,17 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture % ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i32>) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x i32>), [[LOAD]](p1) :: (volatile store (<4 x i32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg @@ -1595,17 +1595,17 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture % ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x i32>) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x i32>), [[LOAD]](p1) :: (volatile store (<4 x i32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load <4 x i32>, ptr addrspace(4) %in.byref store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4 @@ -1619,17 +1619,17 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 260 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg @@ -1637,17 +1637,17 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 292 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 296 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), align 8, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(4) %in.byref store volatile i32 %in, ptr addrspace(1) %out, align 4 @@ -1661,17 +1661,17 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i32>) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x i32>), [[LOAD]](p1) :: (volatile store (<16 x i32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg @@ -1679,17 +1679,17 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 100 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 164 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x i32>) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x i32>), [[LOAD]](p1) :: (volatile store (<16 x i32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load <16 x i32>, ptr addrspace(4) %in.byref store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 @@ -1704,14 +1704,14 @@ define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (i32) from %ir.in.byref, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg @@ -1719,14 +1719,14 @@ define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (i32) from %ir.in.byref, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(1) %in.byref store i32 %in, ptr addrspace(1) %out, align 4 @@ -1739,14 +1739,14 @@ define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, p ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (i32) from %ir.in.byref) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg @@ -1754,14 +1754,14 @@ define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, p ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (i32) from %ir.in.byref) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr %in.byref store i32 %in, ptr addrspace(1) %out, align 4 @@ -1774,14 +1774,14 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 6) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg @@ -1789,14 +1789,14 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 6) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(6) %in.byref store i32 %in, ptr addrspace(1) %out, align 4 @@ -1809,14 +1809,14 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture % ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (i32) from %ir.in.byref, addrspace 999) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg @@ -1824,14 +1824,14 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture % ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (i32) from %ir.in.byref, addrspace 999) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(999) %in.byref store i32 %in, ptr addrspace(1) %out, align 4 @@ -1845,14 +1845,14 @@ define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (i32) from %ir.in.byref, addrspace 3) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg @@ -1860,14 +1860,14 @@ define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (i32) from %ir.in.byref, addrspace 3) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(3) %in.byref store i32 %in, ptr addrspace(1) %out, align 4 @@ -1880,21 +1880,21 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4) - ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i32), align 16, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32) from %ir.in0.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32) from %ir.in1.byref, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg @@ -1902,21 +1902,21 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32) from %ir.in0.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (i32) from %ir.in1.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in0 = load i32, ptr addrspace(4) %in0.byref %in1 = load i32, ptr addrspace(4) %in1.byref @@ -1932,11 +1932,11 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 @@ -1944,11 +1944,11 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(4) %in.byref store i32 %in, ptr addrspace(1) undef, align 4 @@ -1961,11 +1961,11 @@ define amdgpu_kernel void @p3i8_arg(ptr addrspace(3) %arg) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 - ; HSA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i8) = G_CONSTANT i8 9 + ; HSA-VI-NEXT: G_STORE [[C1]](i8), [[LOAD]](p3) :: (store (i8) into %ir.arg, align 4, addrspace 3) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: p3i8_arg @@ -1973,11 +1973,11 @@ define amdgpu_kernel void @p3i8_arg(ptr addrspace(3) %arg) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 - ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i8) = G_CONSTANT i8 9 + ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](i8), [[LOAD]](p3) :: (store (i8) into %ir.arg, align 4, addrspace 3) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store i8 9, ptr addrspace(3) %arg, align 4 ret void @@ -1989,9 +1989,9 @@ define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 9 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 - ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) + ; HSA-VI-NEXT: G_STORE [[C]](i8), [[C1]](p3) :: (store (i8) into `ptr addrspace(3) null`, addrspace 3) ; HSA-VI-NEXT: S_ENDPGM 0 ; ; LEGACY-MESA-VI-LABEL: name: p1i8_arg @@ -1999,9 +1999,9 @@ define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 - ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) + ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](i8), [[C1]](p3) :: (store (i8) into `ptr addrspace(3) null`, addrspace 3) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store i8 9, ptr addrspace(3) null ret void @@ -2013,8 +2013,8 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x ptr addrspace(1)> %arg) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -2025,8 +2025,8 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x ptr addrspace(1)> %arg) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -2041,8 +2041,8 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x ptr addrspace(3)> %arg) nounwind { ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) @@ -2053,8 +2053,8 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x ptr addrspace(3)> %arg) nounwind { ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) @@ -2069,16 +2069,16 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x p ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 ; HSA-VI-NEXT: {{ $}} ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4) - ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4) + ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x i64>), addrspace 4) + ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i32>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) - ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; @@ -2087,16 +2087,16 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x p ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 ; LEGACY-MESA-VI-NEXT: {{ $}} ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x i32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll index e98f298adf58e..5da7f86301ade 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -7,10 +7,10 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(f32) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](f32), [[COPY]](f32), [[COPY]](f32), [[COPY1]](f32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -22,11 +22,11 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(f32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[DEF]](f32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](f32), [[COPY]](f32), [[COPY]](f32), [[COPY1]](f32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -38,8 +38,9 @@ define amdgpu_ps float @vgpr_return(i32 %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %cast = bitcast i32 %vgpr to float ret float %cast @@ -50,9 +51,9 @@ define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret i32 %vgpr } @@ -62,14 +63,14 @@ define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret i64 %vgpr } @@ -79,14 +80,14 @@ define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret <2 x i32> %vgpr } @@ -96,13 +97,13 @@ define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0 %value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1 @@ -115,9 +116,9 @@ define amdgpu_ps ptr addrspace(3) @sgpr_return_p3i8(ptr addrspace(3) %vgpr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret ptr addrspace(3) %vgpr } @@ -127,14 +128,14 @@ define amdgpu_ps ptr addrspace(1) @sgpr_return_p1i8(ptr addrspace(1) %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](p1) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret ptr addrspace(1) %vgpr } @@ -144,10 +145,10 @@ define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret <2 x i16> %vgpr } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 875b725a3c76c..bf437e95d596f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -6,9 +6,9 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(f32) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](f32), [[DEF]](f32), [[DEF]](f32), [[DEF]](f32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -19,9 +19,9 @@ define amdgpu_vs void @test_f32(float %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(f32) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](f32), [[DEF]](f32), [[DEF]](f32), [[DEF]](f32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -32,10 +32,10 @@ define amdgpu_vs void @test_ptr2_inreg(ptr addrspace(4) inreg %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg0, addrspace 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (volatile invariant load (i32) from %ir.arg0, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, ptr addrspace(4) %arg0 ret void @@ -46,13 +46,13 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, ptr addrspace(4) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg1, addrspace 4) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(f32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (volatile invariant load (i32) from %ir.arg1, addrspace 4) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](f32), [[DEF]](f32), [[DEF]](f32), [[DEF]](f32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, ptr addrspace(4) %arg1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 @@ -64,11 +64,11 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](f32), [[COPY]](f32), [[COPY3]](f32), [[COPY1]](f32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0 ret void @@ -79,13 +79,13 @@ define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) { ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 main_body: %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 @@ -96,9 +96,9 @@ main_body: define amdgpu_vs i32 @non_void_ret() { ; CHECK-LABEL: name: non_void_ret ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret i32 0 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll index fc3eb19e98524..a4a06c12a56a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -7,11 +7,11 @@ define void @arg_align_8(ptr addrspace(1) align 8 %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 0 + ; CHECK-NEXT: G_STORE [[C]](i8), [[MV]](p1) :: (store (i8) into %ir.arg0, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i8 0, ptr addrspace(1) %arg0, align 8 ret void @@ -25,7 +25,7 @@ define void @call_result_align_1() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -34,35 +34,35 @@ define void @call_result_align_1() { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](i32), [[COPY20]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1) + ; CHECK-NEXT: G_STORE [[C]](i8), [[MV]](p1) :: (store (i8) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN %ptr = call align 1 ptr addrspace(1) @returns_ptr() store i8 0, ptr addrspace(1) %ptr, align 1 @@ -74,7 +74,7 @@ define void @call_result_align_8() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -83,36 +83,36 @@ define void @call_result_align_8() { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](i32), [[COPY20]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[C]](i8), [[ASSERT_ALIGN]](p1) :: (store (i8) into %ir.ptr, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN %ptr = call align 8 ptr addrspace(1) @returns_ptr() store i8 0, ptr addrspace(1) %ptr, align 8 @@ -124,7 +124,7 @@ define void @declaration_result_align_8() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -133,36 +133,36 @@ define void @declaration_result_align_8() { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](i32), [[COPY20]](i32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 - ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[C]](i8), [[ASSERT_ALIGN]](p1) :: (store (i8) into %ir.ptr, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN %ptr = call ptr addrspace(1) @returns_ptr_align8() store i8 0, ptr addrspace(1) %ptr, align 8 @@ -174,7 +174,7 @@ define ptr addrspace(1) @tail_call_assert_align() { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -187,23 +187,23 @@ define ptr addrspace(1) @tail_call_assert_align() { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %call = tail call ptr addrspace(1) @returns_ptr_align8() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 4ee658666a1b7..89f7b9c158776 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -7,9 +7,9 @@ define float @test_atomicrmw_fadd(ptr addrspace(3) %addr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(f32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (f32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval @@ -22,28 +22,31 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(f32) = G_LOAD [[COPY]](p3) :: (load (f32) from %ir.addr, addrspace 3) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) - ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i64) = G_PHI %19(i64), %bb.2, [[C1]](i64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(f32) = G_PHI [[LOAD]](f32), %bb.1, %18(f32), %bb.2 + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[PHI1]], [[C]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[PHI1]](f32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(i32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (i32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](i1), [[PHI]](i64) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](i64) + ; CHECK-NEXT: G_BRCOND [[INT1]](i1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.atomicrmw.end: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32), %bb.2 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(f32) = G_PHI [[BITCAST2]](f32), %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(i64) = G_PHI [[INT]](i64), %bb.2 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval @@ -55,10 +58,10 @@ define <2 x half> @test_atomicrmw_fadd_vector(ptr addrspace(3) %addr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p3), [[BUILD_VECTOR]] :: (load store seq_cst (<2 x s16>) on %ir.addr, addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[C]](f16), [[C]](f16) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(<2 x f16>) = G_ATOMICRMW_FADD [[COPY]](p3), [[BUILD_VECTOR]] :: (load store seq_cst (<2 x f16>) on %ir.addr, addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](<2 x f16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd ptr addrspace(3) %addr, <2 x half> seq_cst ret <2 x half> %oldval @@ -71,32 +74,32 @@ define <2 x half> @test_atomicrmw_fsub_vector(ptr addrspace(3) %addr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>) from %ir.addr, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[C]](f16), [[C]](f16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x f16>) = G_LOAD [[COPY]](p3) :: (load (<2 x f16>) from %ir.addr, addrspace 3) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %20(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<2 x s16>) = G_PHI [[LOAD]](<2 x s16>), %bb.1, %19(<2 x s16>), %bb.2 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(<2 x s16>) = G_FSUB [[PHI1]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FSUB]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PHI1]](<2 x s16>) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) - ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i64) = G_PHI %20(i64), %bb.2, [[C1]](i64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<2 x f16>) = G_PHI [[LOAD]](<2 x f16>), %bb.1, %19(<2 x f16>), %bb.2 + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(<2 x f16>) = G_FSUB [[PHI1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[PHI1]](<2 x f16>) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(i32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (i32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](i1), [[PHI]](i64) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](i64) + ; CHECK-NEXT: G_BRCOND [[INT1]](i1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.atomicrmw.end: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<2 x s16>) = G_PHI [[BITCAST2]](<2 x s16>), %bb.2 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](<2 x s16>) + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<2 x f16>) = G_PHI [[BITCAST2]](<2 x f16>), %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(i64) = G_PHI [[INT]](i64), %bb.2 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](<2 x f16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub ptr addrspace(3) %addr, <2 x half> seq_cst ret <2 x half> %oldval @@ -109,32 +112,32 @@ define <2 x half> @test_atomicrmw_fmin_vector(ptr addrspace(3) %addr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>) from %ir.addr, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[C]](f16), [[C]](f16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x f16>) = G_LOAD [[COPY]](p3) :: (load (<2 x f16>) from %ir.addr, addrspace 3) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %20(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<2 x s16>) = G_PHI [[LOAD]](<2 x s16>), %bb.1, %19(<2 x s16>), %bb.2 - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM [[PHI1]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PHI1]](<2 x s16>) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) - ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i64) = G_PHI %20(i64), %bb.2, [[C1]](i64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<2 x f16>) = G_PHI [[LOAD]](<2 x f16>), %bb.1, %19(<2 x f16>), %bb.2 + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM [[PHI1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[PHI1]](<2 x f16>) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(i32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (i32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](i1), [[PHI]](i64) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](i64) + ; CHECK-NEXT: G_BRCOND [[INT1]](i1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.atomicrmw.end: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<2 x s16>) = G_PHI [[BITCAST2]](<2 x s16>), %bb.2 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](<2 x s16>) + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<2 x f16>) = G_PHI [[BITCAST2]](<2 x f16>), %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(i64) = G_PHI [[INT]](i64), %bb.2 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](<2 x f16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fmin ptr addrspace(3) %addr, <2 x half> seq_cst ret <2 x half> %oldval @@ -147,32 +150,32 @@ define <2 x half> @test_atomicrmw_fmax_vector(ptr addrspace(3) %addr) { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>) from %ir.addr, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[C]](f16), [[C]](f16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x f16>) = G_LOAD [[COPY]](p3) :: (load (<2 x f16>) from %ir.addr, addrspace 3) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %20(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<2 x s16>) = G_PHI [[LOAD]](<2 x s16>), %bb.1, %19(<2 x s16>), %bb.2 - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM [[PHI1]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PHI1]](<2 x s16>) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) - ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i64) = G_PHI %20(i64), %bb.2, [[C1]](i64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<2 x f16>) = G_PHI [[LOAD]](<2 x f16>), %bb.1, %19(<2 x f16>), %bb.2 + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM [[PHI1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[PHI1]](<2 x f16>) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(i32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[BITCAST1]], [[BITCAST]] :: (load store seq_cst seq_cst (i32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[ATOMIC_CMPXCHG_WITH_SUCCESS]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](i1), [[PHI]](i64) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](i64) + ; CHECK-NEXT: G_BRCOND [[INT1]](i1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.atomicrmw.end: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<2 x s16>) = G_PHI [[BITCAST2]](<2 x s16>), %bb.2 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](<2 x s16>) + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<2 x f16>) = G_PHI [[BITCAST2]](<2 x f16>), %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(i64) = G_PHI [[INT]](i64), %bb.2 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](<2 x f16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fmax ptr addrspace(3) %addr, <2 x half> seq_cst ret <2 x half> %oldval diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 31c08a3479bb3..9a45de7523ef9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -23,23 +23,23 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](i64) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i64) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -52,9 +52,9 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 @@ -64,27 +64,27 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](i64) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i64) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY12]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY11]], [[SHL]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY13]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](i64) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -97,30 +97,30 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](i64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY6]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY5]], [[SHL]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY7]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x i32>) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -146,21 +146,21 @@ define void @func_call_no_workitem_ids() { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -173,7 +173,7 @@ define void @func_call_no_workgroup_ids() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 @@ -184,17 +184,17 @@ define void @func_call_no_workgroup_ids() { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i64) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](i64) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -207,19 +207,19 @@ define void @func_call_no_other_sgprs() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x i32>) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index 6e85ccbafd535..96c846fb5960a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -13,9 +13,9 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -23,41 +23,41 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -66,9 +66,9 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -76,41 +76,41 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -123,7 +123,7 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -132,30 +132,30 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: SI_RETURN @@ -164,7 +164,7 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -173,30 +173,30 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: SI_RETURN @@ -211,9 +211,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -221,77 +221,77 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32) ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX900-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x i32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GFX900-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; GFX900-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; GFX900-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -300,9 +300,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -310,77 +310,77 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32) ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX908-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x i32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GFX908-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; GFX908-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; GFX908-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -393,7 +393,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -402,117 +402,117 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX900-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX900-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX900-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX900-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX900-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX900-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX900-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX900-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX900-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX900-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX900-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX900-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) - ; GFX900-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) - ; GFX900-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) - ; GFX900-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) - ; GFX900-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) - ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX900-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX900-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC2]](i16) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX900-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC4]](i16) + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX900-NEXT: [[TRUNC7:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC6]](i16) + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX900-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC8]](i16) + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX900-NEXT: [[TRUNC11:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC10]](i16) + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX900-NEXT: [[TRUNC13:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC12]](i16) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX900-NEXT: [[TRUNC15:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC14]](i16) + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX900-NEXT: [[TRUNC17:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC16]](i16) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX900-NEXT: [[TRUNC19:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC18]](i16) + ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX900-NEXT: [[TRUNC21:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC20]](i16) + ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX900-NEXT: [[TRUNC23:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC22]](i16) + ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(i16) = G_TRUNC [[COPY21]](i32) + ; GFX900-NEXT: [[TRUNC25:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC24]](i16) + ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(i16) = G_TRUNC [[COPY22]](i32) + ; GFX900-NEXT: [[TRUNC27:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC26]](i16) + ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(i16) = G_TRUNC [[COPY23]](i32) + ; GFX900-NEXT: [[TRUNC29:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC28]](i16) + ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(i16) = G_TRUNC [[COPY24]](i32) + ; GFX900-NEXT: [[TRUNC31:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC30]](i16) + ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(i16) = G_TRUNC [[COPY25]](i32) + ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC32]](i16) + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x i32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) - ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](i32) + ; GFX900-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; GFX900-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: SI_RETURN @@ -521,7 +521,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -530,117 +530,117 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX908-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX908-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX908-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX908-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX908-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX908-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX908-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX908-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX908-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX908-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX908-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX908-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) - ; GFX908-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) - ; GFX908-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) - ; GFX908-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) - ; GFX908-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) - ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX908-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX908-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC2]](i16) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX908-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC4]](i16) + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX908-NEXT: [[TRUNC7:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC6]](i16) + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX908-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC8]](i16) + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX908-NEXT: [[TRUNC11:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC10]](i16) + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX908-NEXT: [[TRUNC13:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC12]](i16) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX908-NEXT: [[TRUNC15:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC14]](i16) + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX908-NEXT: [[TRUNC17:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC16]](i16) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX908-NEXT: [[TRUNC19:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC18]](i16) + ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX908-NEXT: [[TRUNC21:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC20]](i16) + ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX908-NEXT: [[TRUNC23:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC22]](i16) + ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(i16) = G_TRUNC [[COPY21]](i32) + ; GFX908-NEXT: [[TRUNC25:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC24]](i16) + ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(i16) = G_TRUNC [[COPY22]](i32) + ; GFX908-NEXT: [[TRUNC27:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC26]](i16) + ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(i16) = G_TRUNC [[COPY23]](i32) + ; GFX908-NEXT: [[TRUNC29:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC28]](i16) + ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(i16) = G_TRUNC [[COPY24]](i32) + ; GFX908-NEXT: [[TRUNC31:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC30]](i16) + ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(i16) = G_TRUNC [[COPY25]](i32) + ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC32]](i16) + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x i32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) - ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](i32) + ; GFX908-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; GFX908-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: SI_RETURN @@ -653,7 +653,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -661,32 +661,32 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](i64) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -695,7 +695,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -703,32 +703,32 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](i64) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -741,7 +741,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -749,36 +749,36 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](i64) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY15]], [[C3]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -787,7 +787,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -795,36 +795,36 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](i64) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY15]], [[C3]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -837,7 +837,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -845,36 +845,36 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](i64) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY15]], [[C3]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -883,7 +883,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -891,36 +891,36 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](i64) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY15]], [[C3]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -933,8 +933,8 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -942,36 +942,36 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](i64) + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY17]], [[C2]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY16]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -980,8 +980,8 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -989,36 +989,36 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](i64) + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY17]], [[C2]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY16]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -1031,8 +1031,8 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1040,40 +1040,40 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) - ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](i64) + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX900-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY16]], [[C3]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL]] + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY17]], [[C4]](i32) + ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -1082,8 +1082,8 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1091,40 +1091,40 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) - ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](i64) + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX908-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY16]], [[C3]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL]] + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY17]], [[C4]](i32) + ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -1137,8 +1137,8 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1146,36 +1146,36 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX900-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) - ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](i64) + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX900-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY17]], [[C2]](i32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY16]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX900-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -1184,8 +1184,8 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1193,36 +1193,36 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GFX908-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) - ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](i64) + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GFX908-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY17]], [[C2]](i32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY16]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = COPY [[C]](i32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GFX908-NEXT: $vgpr31 = COPY [[OR]](i32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index adad38de380d7..06b79bb484b63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -13,8 +13,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -27,13 +27,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -46,14 +46,14 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](i32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -66,18 +66,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[LOAD]](p1) :: (load (i8) from %ir.ptr0, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[LOAD1]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -92,20 +92,20 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[LOAD]](p1) :: (load (i8) from %ir.ptr0, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[LOAD1]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](i32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](i32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index c87b2ce3eba29..5c6cd39195880 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -72,9 +72,9 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -82,7 +82,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc @@ -90,38 +90,38 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: $vgpr0 = COPY [[C]](i32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](i32), [[LOAD]](p1) :: (volatile store (i32) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out @@ -133,19 +133,19 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1) ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[C]](i32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (volatile store (i32) into %ir.out, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out @@ -157,9 +157,9 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -173,38 +173,38 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY21]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](i1), [[DEF]](p1) :: (volatile store (i1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i1 @external_i1_func_void() store volatile i1 %val, ptr addrspace(1) undef @@ -217,13 +217,13 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY1]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](i1), [[DEF]](p1) :: (volatile store (i1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, ptr addrspace(1) undef @@ -235,9 +235,9 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -251,40 +251,40 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY21]], 1 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[ASSERT_ZEXT]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) - ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[TRUNC]](i1) + ; GCN-NEXT: G_STORE [[ZEXT]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i1 @external_i1_zeroext_func_void() %val.ext = zext i1 %val to i32 @@ -297,9 +297,9 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -313,40 +313,40 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY21]], 1 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[ASSERT_SEXT]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) - ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[TRUNC]](i1) + ; GCN-NEXT: G_STORE [[SEXT]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i1 @external_i1_signext_func_void() %val.ext = sext i1 %val to i32 @@ -359,9 +359,9 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -375,39 +375,39 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) - ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY21]](i32) + ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 @external_i8_func_void() store volatile i8 %val, ptr addrspace(1) undef @@ -420,14 +420,14 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, ptr addrspace(1) undef @@ -439,9 +439,9 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -455,40 +455,40 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY21]], 8 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[ASSERT_ZEXT]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) - ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[TRUNC]](i8) + ; GCN-NEXT: G_STORE [[ZEXT]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 @external_i8_zeroext_func_void() %val.ext = zext i8 %val to i32 @@ -501,9 +501,9 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -517,40 +517,40 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY21]], 8 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[ASSERT_SEXT]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) - ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[TRUNC]](i8) + ; GCN-NEXT: G_STORE [[SEXT]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 @external_i8_signext_func_void() %val.ext = sext i8 %val to i32 @@ -563,9 +563,9 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -579,38 +579,38 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY21]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](i16), [[DEF]](p1) :: (volatile store (i16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i16 @external_i16_func_void() store volatile i16 %val, ptr addrspace(1) undef @@ -622,9 +622,9 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -638,40 +638,40 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY21]], 16 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASSERT_ZEXT]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[TRUNC]](i16) + ; GCN-NEXT: G_STORE [[ZEXT]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i16 @external_i16_zeroext_func_void() %val.ext = zext i16 %val to i32 @@ -684,9 +684,9 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -700,40 +700,40 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY21]], 16 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASSERT_SEXT]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[TRUNC]](i16) + ; GCN-NEXT: G_STORE [[SEXT]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i16 @external_i16_signext_func_void() %val.ext = sext i16 %val to i32 @@ -746,9 +746,9 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -762,37 +762,37 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_void() store volatile i32 %val, ptr addrspace(1) undef @@ -805,12 +805,12 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY1]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, ptr addrspace(1) undef @@ -822,9 +822,9 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -838,40 +838,40 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](i48), [[DEF]](p1) :: (volatile store (i48) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i48 @external_i48_func_void() store volatile i48 %val, ptr addrspace(1) undef @@ -883,9 +883,9 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -899,41 +899,41 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GCN-NEXT: G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[TRUNC]](i48) + ; GCN-NEXT: G_STORE [[ZEXT]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i48 @external_i48_zeroext_func_void() %ext = zext i48 %val to i64 @@ -946,9 +946,9 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -962,41 +962,41 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) - ; GCN-NEXT: G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[TRUNC]](i48) + ; GCN-NEXT: G_STORE [[SEXT]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i48 @external_i48_signext_func_void() %ext = sext i48 %val to i64 @@ -1009,9 +1009,9 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1025,39 +1025,39 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i64 @external_i64_func_void() store volatile i64 %val, ptr addrspace(1) undef @@ -1069,9 +1069,9 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1085,37 +1085,37 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1129,9 +1129,9 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1145,40 +1145,40 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](i32), [[COPY24]](i32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1193,9 +1193,9 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1209,33 +1209,33 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1251,9 +1251,9 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1267,33 +1267,33 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 @@ -1311,9 +1311,9 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1327,38 +1327,39 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY21]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BITCAST]](f16), [[DEF]](p1) :: (volatile store (f16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call half @external_f16_func_void() store volatile half %val, ptr addrspace(1) undef @@ -1370,9 +1371,9 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1386,37 +1387,37 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](f32), [[DEF]](p1) :: (volatile store (f32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call float @external_f32_func_void() store volatile float %val, ptr addrspace(1) undef @@ -1428,9 +1429,9 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1444,39 +1445,40 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BITCAST]](f64), [[DEF]](p1) :: (volatile store (f64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call double @external_f64_func_void() store volatile double %val, ptr addrspace(1) undef @@ -1488,9 +1490,9 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1504,43 +1506,45 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY21]](i32), [[COPY22]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY23]](i32), [[COPY24]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x f64>), [[DEF]](p1) :: (volatile store (<2 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x double> @external_v2f64_func_void() store volatile <2 x double> %val, ptr addrspace(1) undef @@ -1552,9 +1556,9 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1568,39 +1572,39 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[DEF]](p1) :: (volatile store (<2 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i32> @external_v2i32_func_void() store volatile <2 x i32> %val, ptr addrspace(1) undef @@ -1612,9 +1616,9 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1628,40 +1632,40 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x i32>), [[DEF]](p1) :: (volatile store (<3 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i32> @external_v3i32_func_void() store volatile <3 x i32> %val, ptr addrspace(1) undef, align 8 @@ -1673,9 +1677,9 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1689,41 +1693,41 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[DEF]](p1) :: (volatile store (<4 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <4 x i32> @external_v4i32_func_void() store volatile <4 x i32> %val, ptr addrspace(1) undef, align 8 @@ -1735,9 +1739,9 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1751,42 +1755,42 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x i32>), [[DEF]](p1) :: (volatile store (<5 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <5 x i32> @external_v5i32_func_void() store volatile <5 x i32> %val, ptr addrspace(1) undef, align 8 @@ -1798,9 +1802,9 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1814,45 +1818,45 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x i32>), [[DEF]](p1) :: (volatile store (<8 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <8 x i32> @external_v8i32_func_void() store volatile <8 x i32> %val, ptr addrspace(1) undef, align 8 @@ -1864,9 +1868,9 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1880,53 +1884,53 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[COPY31]](i32), [[COPY32]](i32), [[COPY33]](i32), [[COPY34]](i32), [[COPY35]](i32), [[COPY36]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x i32>), [[DEF]](p1) :: (volatile store (<16 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <16 x i32> @external_v16i32_func_void() store volatile <16 x i32> %val, ptr addrspace(1) undef, align 8 @@ -1938,9 +1942,9 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1954,69 +1958,69 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(i32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(i32) = COPY $vgpr31 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[COPY31]](i32), [[COPY32]](i32), [[COPY33]](i32), [[COPY34]](i32), [[COPY35]](i32), [[COPY36]](i32), [[COPY37]](i32), [[COPY38]](i32), [[COPY39]](i32), [[COPY40]](i32), [[COPY41]](i32), [[COPY42]](i32), [[COPY43]](i32), [[COPY44]](i32), [[COPY45]](i32), [[COPY46]](i32), [[COPY47]](i32), [[COPY48]](i32), [[COPY49]](i32), [[COPY50]](i32), [[COPY51]](i32), [[COPY52]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <32 x i32> @external_v32i32_func_void() store volatile <32 x i32> %val, ptr addrspace(1) undef, align 8 @@ -2028,9 +2032,9 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2044,37 +2048,37 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](<2 x i16>), [[DEF]](p1) :: (volatile store (<2 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i16> @external_v2i16_func_void() store volatile <2 x i16> %val, ptr addrspace(1) undef @@ -2086,9 +2090,9 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2102,41 +2106,41 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY21]](<2 x i16>), [[COPY22]](<2 x i16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x i16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x i16>), [[DEF]](p1) :: (volatile store (<3 x i16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() store volatile <3 x i16> %val, ptr addrspace(1) undef @@ -2148,9 +2152,9 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2164,39 +2168,39 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY21]](<2 x i16>), [[COPY22]](<2 x i16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x i16>), [[DEF]](p1) :: (volatile store (<4 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <4 x i16> @external_v4i16_func_void() store volatile <4 x i16> %val, ptr addrspace(1) undef @@ -2208,9 +2212,9 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2224,37 +2228,37 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](<2 x f16>), [[DEF]](p1) :: (volatile store (<2 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x half> @external_v2f16_func_void() store volatile <2 x half> %val, ptr addrspace(1) undef @@ -2266,9 +2270,9 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2282,41 +2286,41 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY21]](<2 x f16>), [[COPY22]](<2 x f16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f16), [[UV1:%[0-9]+]]:_(f16), [[UV2:%[0-9]+]]:_(f16), [[UV3:%[0-9]+]]:_(f16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x f16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f16>) = G_BUILD_VECTOR [[UV]](f16), [[UV1]](f16), [[UV2]](f16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x f16>), [[DEF]](p1) :: (volatile store (<3 x f16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() store volatile <3 x half> %val, ptr addrspace(1) undef @@ -2328,9 +2332,9 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2344,39 +2348,39 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY21]](<2 x f16>), [[COPY22]](<2 x f16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x f16>), [[DEF]](p1) :: (volatile store (<4 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <4 x half> @external_v4f16_func_void() store volatile <4 x half> %val, ptr addrspace(1) undef @@ -2388,9 +2392,9 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2404,40 +2408,40 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[COPY21]](f32), [[COPY22]](f32), [[COPY23]](f32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x f32>), [[DEF]](p1) :: (volatile store (<3 x f32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x float> @external_v3f32_func_void() store volatile <3 x float> %val, ptr addrspace(1) undef @@ -2449,9 +2453,9 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2465,42 +2469,42 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[COPY21]](f32), [[COPY22]](f32), [[COPY23]](f32), [[COPY24]](f32), [[COPY25]](f32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x f32>), [[DEF]](p1) :: (volatile store (<5 x f32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <5 x float> @external_v5f32_func_void() store volatile <5 x float> %val, ptr addrspace(1) undef @@ -2513,9 +2517,9 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2529,41 +2533,41 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, i64 } @external_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 @@ -2579,16 +2583,16 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY1]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 @@ -2603,9 +2607,9 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2619,39 +2623,39 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY22]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [2 x i32] @external_a2i32_func_void() %val.0 = extractvalue [2 x i32] %val, 0 @@ -2666,9 +2670,9 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2682,55 +2686,55 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) - ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) - ; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) - ; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) - ; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) - ; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY21]](i32) + ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY22]](i32) + ; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC2]](i16) + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY23]](i32) + ; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC4]](i16) + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY24]](i32) + ; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC6]](i16) + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY25]](i32) + ; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC8]](i16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC3]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC5]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC7]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC9]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [5 x i8] @external_a5i8_func_void() %val.0 = extractvalue [5 x i8] %val, 0 @@ -2751,9 +2755,9 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2768,42 +2772,42 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x i32>) from %stack.0, addrspace 5) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](i32) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from %stack.0, align 128, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void() %val0 = extractvalue { <32 x i32>, i32 } %val, 0 @@ -2818,9 +2822,9 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2835,42 +2839,42 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (i32) from %stack.0, align 128, addrspace 5) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](i32) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x i32>) from %stack.0, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, <32 x i32> } @external_i32_v32i32_func_void() %val0 = extractvalue { i32, <32 x i32> } %val, 0 @@ -2885,9 +2889,9 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2902,38 +2906,38 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x i32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x i32>) from %stack.0, align 256, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](<33 x i32>), [[DEF]](p1) :: (volatile store (<33 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <33 x i32> @external_v33i32_func_void() store volatile <33 x i32> %val, ptr addrspace(1) undef, align 8 @@ -2945,9 +2949,9 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2958,51 +2962,51 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %18(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GCN-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD %18(p4) :: (dereferenceable invariant load (i32) from %ir.idx.kernarg.offset, align 8, addrspace 4) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: $vgpr1 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $vgpr1 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[UV1]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](i32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x i32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x i32>) from %stack.0, align 256, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD2]](<33 x i32>), [[DEF]](p1) :: (volatile store (<33 x i32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) store volatile <33 x i32> %val, ptr addrspace(1) undef, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index 6000e9c60aac4..a0c709ab184d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -8,9 +8,9 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: bb.1 (%ir-block.1): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -18,61 +18,61 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 3 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.in.val ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: %18:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.val, addrspace 5) - ; GCN-NEXT: G_STORE [[C1]](s32), %18(p5) :: (store (s32) into %ir.in.gep1, addrspace 5) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: %18:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](i32) + ; GCN-NEXT: G_STORE [[C]](i8), [[FRAME_INDEX]](p5) :: (store (i8) into %ir.in.val, addrspace 5) + ; GCN-NEXT: G_STORE [[C1]](i32), %18(p5) :: (store (i32) into %ir.in.gep1, addrspace 5) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](i64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C4]](i32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C5]](i32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) - ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) + ; GCN-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](i32) + ; GCN-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](i32), 0 :: (dereferenceable store (i64) into stack, align 4, addrspace 5), (dereferenceable load (i64) from %ir.in.val, align 4, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[OR1]](i32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc - ; GCN-NEXT: %46:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %46(p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: %46:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](i32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (i8) from %ir.out.val, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD %46(p5) :: (dereferenceable load (i32) from %ir.out.gep1, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 78f33a174980d..08f5baf455b0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -117,9 +117,9 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -132,33 +132,33 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 { ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -171,8 +171,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -185,7 +185,7 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -199,23 +199,23 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -228,9 +228,9 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -238,40 +238,40 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -284,9 +284,9 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -294,40 +294,40 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -340,9 +340,9 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -350,41 +350,41 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i1) = G_CONSTANT i1 true ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[C]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -397,9 +397,9 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -409,41 +409,41 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -457,9 +457,9 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -469,41 +469,41 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -517,9 +517,9 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -527,43 +527,43 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 123 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[C]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -576,9 +576,9 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -588,42 +588,42 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i16) = G_SEXT [[LOAD]](i8) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[SEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -637,9 +637,9 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -649,42 +649,42 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i16) = G_ZEXT [[LOAD]](i8) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ZEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -698,9 +698,9 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -708,41 +708,41 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[C]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -755,9 +755,9 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -767,41 +767,41 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[LOAD]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -815,9 +815,9 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -827,41 +827,41 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LOAD]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -875,9 +875,9 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -885,41 +885,41 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -932,13 +932,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -951,14 +951,14 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](i32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -971,9 +971,9 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -981,42 +981,42 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1029,9 +1029,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1040,44 +1040,44 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x i64>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1091,9 +1091,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1101,46 +1101,46 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934593 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 17179869187 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C1]](i64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C4]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1153,9 +1153,9 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1165,43 +1165,43 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1215,9 +1215,9 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1227,43 +1227,43 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1277,9 +1277,9 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1289,43 +1289,43 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (i48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ZEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1339,9 +1339,9 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1356,36 +1356,36 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 { ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1398,9 +1398,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1415,38 +1415,38 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1460,9 +1460,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1471,50 +1471,50 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934593 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C1]](i64), [[DEF]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x i64>) from `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x i64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x i64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C4]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHUF]](<3 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1530,9 +1530,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1541,52 +1541,52 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934593 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 17179869187 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C1]](i64), [[C2]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x i64>) from `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x i64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x i64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C5]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHUF]](<4 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1601,9 +1601,9 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1611,41 +1611,42 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH4400 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1658,9 +1659,9 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1668,40 +1669,40 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[C]](f32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1714,9 +1715,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1724,44 +1725,44 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[C]](f32), [[C1]](f32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C4]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1774,9 +1775,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1784,46 +1785,46 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C]](f32), [[C1]](f32), [[C2]](f32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C5]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1836,9 +1837,9 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1846,50 +1847,50 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C]](f32), [[C1]](f32), [[C2]](f32), [[C3]](f32), [[C4]](f32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C6]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C7]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1902,9 +1903,9 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1912,42 +1913,43 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[C]](f64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1960,9 +1962,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1970,46 +1972,47 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 4.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[C]](f64), [[C1]](f64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C4]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2022,9 +2025,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2032,49 +2035,50 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 4.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f64) = G_FCONSTANT double 8.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f64>) = G_BUILD_VECTOR [[C]](f64), [[C1]](f64), [[C2]](f64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C5]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[BUILD_VECTOR]](<3 x f64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2087,9 +2091,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2098,40 +2102,40 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x i16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2145,9 +2149,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2156,45 +2160,45 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x i16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD]](<3 x i16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[DEF2]](i16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2208,9 +2212,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2219,45 +2223,45 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x f16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x f16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f16), [[UV1:%[0-9]+]]:_(f16), [[UV2:%[0-9]+]]:_(f16) = G_UNMERGE_VALUES [[LOAD]](<3 x f16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f16>) = G_BUILD_VECTOR [[UV]](f16), [[UV1]](f16), [[UV2]](f16), [[DEF2]](f16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x f16>), [[UV4:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x f16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x f16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2271,9 +2275,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2282,42 +2286,42 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x i16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2331,9 +2335,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2341,46 +2345,46 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C1]](i16), [[C2]](i16), [[C3]](i16) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C5]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C6]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2393,9 +2397,9 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2404,46 +2408,46 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x i16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD]](<5 x i16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16), [[DEF2]](i16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2457,9 +2461,9 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2468,47 +2472,47 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x i16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16) - ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16), [[UV6:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD]](<7 x i16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16), [[UV5]](i16), [[UV6]](i16), [[DEF2]](i16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x i16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2522,9 +2526,9 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2533,78 +2537,78 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x i16>) from `ptr addrspace(1) undef`, align 128, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16) - ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16), [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16), [[UV8:%[0-9]+]]:_(i16), [[UV9:%[0-9]+]]:_(i16), [[UV10:%[0-9]+]]:_(i16), [[UV11:%[0-9]+]]:_(i16), [[UV12:%[0-9]+]]:_(i16), [[UV13:%[0-9]+]]:_(i16), [[UV14:%[0-9]+]]:_(i16), [[UV15:%[0-9]+]]:_(i16), [[UV16:%[0-9]+]]:_(i16), [[UV17:%[0-9]+]]:_(i16), [[UV18:%[0-9]+]]:_(i16), [[UV19:%[0-9]+]]:_(i16), [[UV20:%[0-9]+]]:_(i16), [[UV21:%[0-9]+]]:_(i16), [[UV22:%[0-9]+]]:_(i16), [[UV23:%[0-9]+]]:_(i16), [[UV24:%[0-9]+]]:_(i16), [[UV25:%[0-9]+]]:_(i16), [[UV26:%[0-9]+]]:_(i16), [[UV27:%[0-9]+]]:_(i16), [[UV28:%[0-9]+]]:_(i16), [[UV29:%[0-9]+]]:_(i16), [[UV30:%[0-9]+]]:_(i16), [[UV31:%[0-9]+]]:_(i16), [[UV32:%[0-9]+]]:_(i16), [[UV33:%[0-9]+]]:_(i16), [[UV34:%[0-9]+]]:_(i16), [[UV35:%[0-9]+]]:_(i16), [[UV36:%[0-9]+]]:_(i16), [[UV37:%[0-9]+]]:_(i16), [[UV38:%[0-9]+]]:_(i16), [[UV39:%[0-9]+]]:_(i16), [[UV40:%[0-9]+]]:_(i16), [[UV41:%[0-9]+]]:_(i16), [[UV42:%[0-9]+]]:_(i16), [[UV43:%[0-9]+]]:_(i16), [[UV44:%[0-9]+]]:_(i16), [[UV45:%[0-9]+]]:_(i16), [[UV46:%[0-9]+]]:_(i16), [[UV47:%[0-9]+]]:_(i16), [[UV48:%[0-9]+]]:_(i16), [[UV49:%[0-9]+]]:_(i16), [[UV50:%[0-9]+]]:_(i16), [[UV51:%[0-9]+]]:_(i16), [[UV52:%[0-9]+]]:_(i16), [[UV53:%[0-9]+]]:_(i16), [[UV54:%[0-9]+]]:_(i16), [[UV55:%[0-9]+]]:_(i16), [[UV56:%[0-9]+]]:_(i16), [[UV57:%[0-9]+]]:_(i16), [[UV58:%[0-9]+]]:_(i16), [[UV59:%[0-9]+]]:_(i16), [[UV60:%[0-9]+]]:_(i16), [[UV61:%[0-9]+]]:_(i16), [[UV62:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD]](<63 x i16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16), [[UV5]](i16), [[UV6]](i16), [[UV7]](i16), [[UV8]](i16), [[UV9]](i16), [[UV10]](i16), [[UV11]](i16), [[UV12]](i16), [[UV13]](i16), [[UV14]](i16), [[UV15]](i16), [[UV16]](i16), [[UV17]](i16), [[UV18]](i16), [[UV19]](i16), [[UV20]](i16), [[UV21]](i16), [[UV22]](i16), [[UV23]](i16), [[UV24]](i16), [[UV25]](i16), [[UV26]](i16), [[UV27]](i16), [[UV28]](i16), [[UV29]](i16), [[UV30]](i16), [[UV31]](i16), [[UV32]](i16), [[UV33]](i16), [[UV34]](i16), [[UV35]](i16), [[UV36]](i16), [[UV37]](i16), [[UV38]](i16), [[UV39]](i16), [[UV40]](i16), [[UV41]](i16), [[UV42]](i16), [[UV43]](i16), [[UV44]](i16), [[UV45]](i16), [[UV46]](i16), [[UV47]](i16), [[UV48]](i16), [[UV49]](i16), [[UV50]](i16), [[UV51]](i16), [[UV52]](i16), [[UV53]](i16), [[UV54]](i16), [[UV55]](i16), [[UV56]](i16), [[UV57]](i16), [[UV58]](i16), [[UV59]](i16), [[UV60]](i16), [[UV61]](i16), [[UV62]](i16), [[DEF2]](i16) + ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x i16>), [[UV64:%[0-9]+]]:_(<2 x i16>), [[UV65:%[0-9]+]]:_(<2 x i16>), [[UV66:%[0-9]+]]:_(<2 x i16>), [[UV67:%[0-9]+]]:_(<2 x i16>), [[UV68:%[0-9]+]]:_(<2 x i16>), [[UV69:%[0-9]+]]:_(<2 x i16>), [[UV70:%[0-9]+]]:_(<2 x i16>), [[UV71:%[0-9]+]]:_(<2 x i16>), [[UV72:%[0-9]+]]:_(<2 x i16>), [[UV73:%[0-9]+]]:_(<2 x i16>), [[UV74:%[0-9]+]]:_(<2 x i16>), [[UV75:%[0-9]+]]:_(<2 x i16>), [[UV76:%[0-9]+]]:_(<2 x i16>), [[UV77:%[0-9]+]]:_(<2 x i16>), [[UV78:%[0-9]+]]:_(<2 x i16>), [[UV79:%[0-9]+]]:_(<2 x i16>), [[UV80:%[0-9]+]]:_(<2 x i16>), [[UV81:%[0-9]+]]:_(<2 x i16>), [[UV82:%[0-9]+]]:_(<2 x i16>), [[UV83:%[0-9]+]]:_(<2 x i16>), [[UV84:%[0-9]+]]:_(<2 x i16>), [[UV85:%[0-9]+]]:_(<2 x i16>), [[UV86:%[0-9]+]]:_(<2 x i16>), [[UV87:%[0-9]+]]:_(<2 x i16>), [[UV88:%[0-9]+]]:_(<2 x i16>), [[UV89:%[0-9]+]]:_(<2 x i16>), [[UV90:%[0-9]+]]:_(<2 x i16>), [[UV91:%[0-9]+]]:_(<2 x i16>), [[UV92:%[0-9]+]]:_(<2 x i16>), [[UV93:%[0-9]+]]:_(<2 x i16>), [[UV94:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x i16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV94]](<2 x i16>), [[PTR_ADD1]](p5) :: (store (<2 x i16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x i16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x i16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x i16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x i16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x i16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x i16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x i16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x i16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x i16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x i16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x i16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x i16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x i16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x i16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x i16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x i16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x i16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x i16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x i16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x i16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x i16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x i16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x i16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x i16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x i16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x i16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x i16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x i16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2618,9 +2622,9 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2629,81 +2633,81 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x i16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16) - ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16), [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16), [[UV8:%[0-9]+]]:_(i16), [[UV9:%[0-9]+]]:_(i16), [[UV10:%[0-9]+]]:_(i16), [[UV11:%[0-9]+]]:_(i16), [[UV12:%[0-9]+]]:_(i16), [[UV13:%[0-9]+]]:_(i16), [[UV14:%[0-9]+]]:_(i16), [[UV15:%[0-9]+]]:_(i16), [[UV16:%[0-9]+]]:_(i16), [[UV17:%[0-9]+]]:_(i16), [[UV18:%[0-9]+]]:_(i16), [[UV19:%[0-9]+]]:_(i16), [[UV20:%[0-9]+]]:_(i16), [[UV21:%[0-9]+]]:_(i16), [[UV22:%[0-9]+]]:_(i16), [[UV23:%[0-9]+]]:_(i16), [[UV24:%[0-9]+]]:_(i16), [[UV25:%[0-9]+]]:_(i16), [[UV26:%[0-9]+]]:_(i16), [[UV27:%[0-9]+]]:_(i16), [[UV28:%[0-9]+]]:_(i16), [[UV29:%[0-9]+]]:_(i16), [[UV30:%[0-9]+]]:_(i16), [[UV31:%[0-9]+]]:_(i16), [[UV32:%[0-9]+]]:_(i16), [[UV33:%[0-9]+]]:_(i16), [[UV34:%[0-9]+]]:_(i16), [[UV35:%[0-9]+]]:_(i16), [[UV36:%[0-9]+]]:_(i16), [[UV37:%[0-9]+]]:_(i16), [[UV38:%[0-9]+]]:_(i16), [[UV39:%[0-9]+]]:_(i16), [[UV40:%[0-9]+]]:_(i16), [[UV41:%[0-9]+]]:_(i16), [[UV42:%[0-9]+]]:_(i16), [[UV43:%[0-9]+]]:_(i16), [[UV44:%[0-9]+]]:_(i16), [[UV45:%[0-9]+]]:_(i16), [[UV46:%[0-9]+]]:_(i16), [[UV47:%[0-9]+]]:_(i16), [[UV48:%[0-9]+]]:_(i16), [[UV49:%[0-9]+]]:_(i16), [[UV50:%[0-9]+]]:_(i16), [[UV51:%[0-9]+]]:_(i16), [[UV52:%[0-9]+]]:_(i16), [[UV53:%[0-9]+]]:_(i16), [[UV54:%[0-9]+]]:_(i16), [[UV55:%[0-9]+]]:_(i16), [[UV56:%[0-9]+]]:_(i16), [[UV57:%[0-9]+]]:_(i16), [[UV58:%[0-9]+]]:_(i16), [[UV59:%[0-9]+]]:_(i16), [[UV60:%[0-9]+]]:_(i16), [[UV61:%[0-9]+]]:_(i16), [[UV62:%[0-9]+]]:_(i16), [[UV63:%[0-9]+]]:_(i16), [[UV64:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[LOAD]](<65 x i16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16), [[UV5]](i16), [[UV6]](i16), [[UV7]](i16), [[UV8]](i16), [[UV9]](i16), [[UV10]](i16), [[UV11]](i16), [[UV12]](i16), [[UV13]](i16), [[UV14]](i16), [[UV15]](i16), [[UV16]](i16), [[UV17]](i16), [[UV18]](i16), [[UV19]](i16), [[UV20]](i16), [[UV21]](i16), [[UV22]](i16), [[UV23]](i16), [[UV24]](i16), [[UV25]](i16), [[UV26]](i16), [[UV27]](i16), [[UV28]](i16), [[UV29]](i16), [[UV30]](i16), [[UV31]](i16), [[UV32]](i16), [[UV33]](i16), [[UV34]](i16), [[UV35]](i16), [[UV36]](i16), [[UV37]](i16), [[UV38]](i16), [[UV39]](i16), [[UV40]](i16), [[UV41]](i16), [[UV42]](i16), [[UV43]](i16), [[UV44]](i16), [[UV45]](i16), [[UV46]](i16), [[UV47]](i16), [[UV48]](i16), [[UV49]](i16), [[UV50]](i16), [[UV51]](i16), [[UV52]](i16), [[UV53]](i16), [[UV54]](i16), [[UV55]](i16), [[UV56]](i16), [[UV57]](i16), [[UV58]](i16), [[UV59]](i16), [[UV60]](i16), [[UV61]](i16), [[UV62]](i16), [[UV63]](i16), [[UV64]](i16), [[DEF2]](i16) + ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x i16>), [[UV66:%[0-9]+]]:_(<2 x i16>), [[UV67:%[0-9]+]]:_(<2 x i16>), [[UV68:%[0-9]+]]:_(<2 x i16>), [[UV69:%[0-9]+]]:_(<2 x i16>), [[UV70:%[0-9]+]]:_(<2 x i16>), [[UV71:%[0-9]+]]:_(<2 x i16>), [[UV72:%[0-9]+]]:_(<2 x i16>), [[UV73:%[0-9]+]]:_(<2 x i16>), [[UV74:%[0-9]+]]:_(<2 x i16>), [[UV75:%[0-9]+]]:_(<2 x i16>), [[UV76:%[0-9]+]]:_(<2 x i16>), [[UV77:%[0-9]+]]:_(<2 x i16>), [[UV78:%[0-9]+]]:_(<2 x i16>), [[UV79:%[0-9]+]]:_(<2 x i16>), [[UV80:%[0-9]+]]:_(<2 x i16>), [[UV81:%[0-9]+]]:_(<2 x i16>), [[UV82:%[0-9]+]]:_(<2 x i16>), [[UV83:%[0-9]+]]:_(<2 x i16>), [[UV84:%[0-9]+]]:_(<2 x i16>), [[UV85:%[0-9]+]]:_(<2 x i16>), [[UV86:%[0-9]+]]:_(<2 x i16>), [[UV87:%[0-9]+]]:_(<2 x i16>), [[UV88:%[0-9]+]]:_(<2 x i16>), [[UV89:%[0-9]+]]:_(<2 x i16>), [[UV90:%[0-9]+]]:_(<2 x i16>), [[UV91:%[0-9]+]]:_(<2 x i16>), [[UV92:%[0-9]+]]:_(<2 x i16>), [[UV93:%[0-9]+]]:_(<2 x i16>), [[UV94:%[0-9]+]]:_(<2 x i16>), [[UV95:%[0-9]+]]:_(<2 x i16>), [[UV96:%[0-9]+]]:_(<2 x i16>), [[UV97:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x i16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV96]](<2 x i16>), [[PTR_ADD1]](p5) :: (store (<2 x i16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; CHECK-NEXT: G_STORE [[UV97]](<2 x i16>), [[PTR_ADD2]](p5) :: (store (<2 x i16>) into stack + 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x i16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x i16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x i16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x i16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x i16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x i16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x i16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x i16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x i16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x i16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x i16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x i16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x i16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x i16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x i16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x i16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x i16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x i16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x i16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x i16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x i16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x i16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x i16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x i16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x i16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x i16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x i16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x i16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2717,9 +2721,9 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2728,78 +2732,78 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x i16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x i16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>), [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>), [[UV14:%[0-9]+]]:_(<2 x i16>), [[UV15:%[0-9]+]]:_(<2 x i16>), [[UV16:%[0-9]+]]:_(<2 x i16>), [[UV17:%[0-9]+]]:_(<2 x i16>), [[UV18:%[0-9]+]]:_(<2 x i16>), [[UV19:%[0-9]+]]:_(<2 x i16>), [[UV20:%[0-9]+]]:_(<2 x i16>), [[UV21:%[0-9]+]]:_(<2 x i16>), [[UV22:%[0-9]+]]:_(<2 x i16>), [[UV23:%[0-9]+]]:_(<2 x i16>), [[UV24:%[0-9]+]]:_(<2 x i16>), [[UV25:%[0-9]+]]:_(<2 x i16>), [[UV26:%[0-9]+]]:_(<2 x i16>), [[UV27:%[0-9]+]]:_(<2 x i16>), [[UV28:%[0-9]+]]:_(<2 x i16>), [[UV29:%[0-9]+]]:_(<2 x i16>), [[UV30:%[0-9]+]]:_(<2 x i16>), [[UV31:%[0-9]+]]:_(<2 x i16>), [[UV32:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<66 x i16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](<2 x i16>), [[PTR_ADD1]](p5) :: (store (<2 x i16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](<2 x i16>), [[PTR_ADD2]](p5) :: (store (<2 x i16>) into stack + 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x i16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x i16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x i16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x i16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x i16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x i16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x i16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x i16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x i16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x i16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x i16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x i16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x i16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x i16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x i16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x i16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x i16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x i16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x i16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x i16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x i16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x i16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x i16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x i16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x i16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x i16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x i16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x i16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2813,9 +2817,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2824,40 +2828,40 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x f16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x f16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x f16>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2871,9 +2875,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2882,42 +2886,42 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x i32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2931,9 +2935,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -2941,44 +2945,44 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C4]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2991,9 +2995,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3001,47 +3005,47 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32), [[C2]](i32) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C5]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3054,9 +3058,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3064,49 +3068,49 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32), [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C5]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C6]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[C3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3119,9 +3123,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3130,44 +3134,44 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x i32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3181,9 +3185,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3191,48 +3195,48 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32), [[C2]](i32), [[C3]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C5]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C6]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3245,9 +3249,9 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3255,50 +3259,50 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32), [[C2]](i32), [[C3]](i32), [[C4]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C6]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C7]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3311,9 +3315,9 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3323,48 +3327,48 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x i32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<8 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3379,9 +3383,9 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3389,56 +3393,56 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32), [[C2]](i32), [[C3]](i32), [[C4]](i32), [[C5]](i32), [[C6]](i32), [[C7]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C9]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C10]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3451,9 +3455,9 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3463,56 +3467,56 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x i32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<16 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3527,9 +3531,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3539,75 +3543,75 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x i32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<32 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3622,9 +3626,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3636,79 +3640,79 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x i32>) from %ir.ptr0, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (i32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<32 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; CHECK-NEXT: G_STORE [[LOAD2]](i32), [[PTR_ADD2]](p5) :: (store (i32) into stack + 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3724,9 +3728,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3737,88 +3741,88 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x i32>) from %ir.ptr0, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (i8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (i16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<32 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[LOAD2]](i8) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; CHECK-NEXT: G_STORE [[ANYEXT]](i16), [[PTR_ADD2]](p5) :: (store (i16) into stack + 4, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i16) = COPY [[ANYEXT]](i16) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](i32) + ; CHECK-NEXT: G_STORE [[COPY20]](i16), [[PTR_ADD3]](p5) :: (store (i16) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](i32) + ; CHECK-NEXT: G_STORE [[LOAD3]](i16), [[PTR_ADD4]](p5) :: (store (i16) into stack + 12, align 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3836,9 +3840,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3849,7 +3853,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x i32>) from %ir.ptr0, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc @@ -3857,75 +3861,75 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<32 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) ; CHECK-NEXT: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](i32) ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3942,9 +3946,9 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -3954,46 +3958,46 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (i8) from %ir.ptr0, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (i32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[LOAD1]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4008,18 +4012,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[LOAD]](p1) :: (load (i8) from %ir.ptr0, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[LOAD1]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4034,20 +4038,20 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i8) = G_LOAD [[LOAD]](p1) :: (load (i8) from %ir.ptr0, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[LOAD1]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](i32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](i32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4062,9 +4066,9 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4072,50 +4076,50 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 3 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: %15:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) - ; CHECK-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.val, addrspace 5) - ; CHECK-NEXT: G_STORE [[C1]](s32), %15(p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: %15:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](i32) + ; CHECK-NEXT: G_STORE [[C]](i8), [[FRAME_INDEX]](p5) :: (store (i8) into %ir.val, addrspace 5) + ; CHECK-NEXT: G_STORE [[C1]](i32), %15(p5) :: (store (i32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C5]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](i32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](i32), 0 :: (dereferenceable store (i64) into stack, align 4, addrspace 5), (dereferenceable load (i64) from %ir.val, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4135,7 +4139,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -4146,39 +4150,39 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](i32), 0 :: (dereferenceable store (i96) into stack, align 4, addrspace 5), (dereferenceable load (i96) from %ir.incoming0, align 4, addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](i32), 0 :: (dereferenceable store (i8) into stack + 32, align 32, addrspace 5), (dereferenceable load (i8) from %ir.incoming1, align 32, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4195,7 +4199,7 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -4210,28 +4214,28 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](i32), 0 :: (dereferenceable store (i256) into stack, align 4, addrspace 5), (dereferenceable load (i256) from %ir.incoming_high_align, align 256, addrspace 5) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4244,9 +4248,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4256,46 +4260,46 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x i8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<2 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](i32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4310,9 +4314,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4322,49 +4326,49 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x i8>) from %ir.ptr, align 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<3 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](i32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](i32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4379,9 +4383,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4391,52 +4395,52 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x i8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8), [[UV3:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<4 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[UV3]](i8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](i32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](i32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](i32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT3]](i16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4451,9 +4455,9 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4463,64 +4467,64 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x i8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x i8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT8]](s32) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT9]](s32) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT10]](s32) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT11]](s32) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT12]](s32) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT13]](s32) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT14]](s32) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8), [[UV3:%[0-9]+]]:_(i8), [[UV4:%[0-9]+]]:_(i8), [[UV5:%[0-9]+]]:_(i8), [[UV6:%[0-9]+]]:_(i8), [[UV7:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<8 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[UV3]](i8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i16) = G_ANYEXT [[UV4]](i8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i16) = G_ANYEXT [[UV5]](i8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i16) = G_ANYEXT [[UV6]](i8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i16) = G_ANYEXT [[UV7]](i8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT8]](i32) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT9]](i32) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT10]](i32) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT3]](i16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT11]](i32) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT4]](i16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT12]](i32) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT5]](i16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT13]](i32) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT6]](i16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT14]](i32) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT7]](i16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT15]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4535,9 +4539,9 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4547,88 +4551,88 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x i8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i8), [[UV1:%[0-9]+]]:_(i8), [[UV2:%[0-9]+]]:_(i8), [[UV3:%[0-9]+]]:_(i8), [[UV4:%[0-9]+]]:_(i8), [[UV5:%[0-9]+]]:_(i8), [[UV6:%[0-9]+]]:_(i8), [[UV7:%[0-9]+]]:_(i8), [[UV8:%[0-9]+]]:_(i8), [[UV9:%[0-9]+]]:_(i8), [[UV10:%[0-9]+]]:_(i8), [[UV11:%[0-9]+]]:_(i8), [[UV12:%[0-9]+]]:_(i8), [[UV13:%[0-9]+]]:_(i8), [[UV14:%[0-9]+]]:_(i8), [[UV15:%[0-9]+]]:_(i8) = G_UNMERGE_VALUES [[LOAD1]](<16 x i8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[UV]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[UV1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[UV2]](i8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[UV3]](i8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i16) = G_ANYEXT [[UV4]](i8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i16) = G_ANYEXT [[UV5]](i8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i16) = G_ANYEXT [[UV6]](i8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i16) = G_ANYEXT [[UV7]](i8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i16) = G_ANYEXT [[UV8]](i8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i16) = G_ANYEXT [[UV9]](i8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i16) = G_ANYEXT [[UV10]](i8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i16) = G_ANYEXT [[UV11]](i8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(i16) = G_ANYEXT [[UV12]](i8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(i16) = G_ANYEXT [[UV13]](i8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(i16) = G_ANYEXT [[UV14]](i8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(i16) = G_ANYEXT [[UV15]](i8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](i32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](i32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT2]](i16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](i32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT3]](i16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](i32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT4]](i16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](i32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT5]](i16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](i32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT6]](i16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](i32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT7]](i16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](i32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT8]](i16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](i32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT9]](i16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](i32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT10]](i16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](i32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT11]](i16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](i32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT12]](i16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](i32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT13]](i16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](i32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT14]](i16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](i32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(i32) = G_ANYEXT [[ANYEXT15]](i16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4643,9 +4647,9 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4654,85 +4658,86 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %18(p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x i32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x i32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(f64) = G_LOAD %18(p4) :: (dereferenceable invariant load (f64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<32 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[LOAD1]](f64) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](i64) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](i32), [[PTR_ADD2]](p5) :: (store (i32) into stack + 4, addrspace 5) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](i32) + ; CHECK-NEXT: G_STORE [[UV33]](i32), [[PTR_ADD3]](p5) :: (store (i32) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4746,7 +4751,7 @@ define void @stack_12xv3i32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -4755,115 +4760,115 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C2]](i32), [[C2]](i32), [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C3]](i32), [[C3]](i32), [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C4]](i32), [[C4]](i32), [[C4]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C5]](i32), [[C5]](i32), [[C5]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C6]](i32), [[C6]](i32), [[C6]](i32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C7]](i32), [[C7]](i32), [[C7]](i32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C8]](i32), [[C8]](i32), [[C8]](i32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C9]](i32), [[C9]](i32), [[C9]](i32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C10]](i32), [[C11]](i32), [[C12]](i32) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C13]](i32), [[C14]](i32), [[C15]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>) - ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>) - ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) - ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x i32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x i32>) + ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x i32>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x i32>) + ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x i32>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x i32>) + ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x i32>) + ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32), [[UV32:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack + 4, addrspace 5) + ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x i32>) + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](i32) + ; CHECK-NEXT: G_STORE [[UV33]](i32), [[PTR_ADD2]](p5) :: (store (i32) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](i32) + ; CHECK-NEXT: G_STORE [[UV34]](i32), [[PTR_ADD3]](p5) :: (store (i32) into stack + 12, addrspace 5) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](i32) + ; CHECK-NEXT: G_STORE [[UV35]](i32), [[PTR_ADD4]](p5) :: (store (i32) into stack + 16, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4889,7 +4894,7 @@ define void @stack_12xv3f32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -4898,115 +4903,115 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01 - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01 - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01 - ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01 - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01 - ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C]](f32), [[C]](f32), [[C]](f32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C1]](f32), [[C1]](f32), [[C1]](f32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C2]](f32), [[C2]](f32), [[C2]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C3]](f32), [[C3]](f32), [[C3]](f32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C4]](f32), [[C4]](f32), [[C4]](f32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C5]](f32), [[C5]](f32), [[C5]](f32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C6]](f32), [[C6]](f32), [[C6]](f32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 7.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C7]](f32), [[C7]](f32), [[C7]](f32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(f32) = G_FCONSTANT float 8.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C8]](f32), [[C8]](f32), [[C8]](f32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(f32) = G_FCONSTANT float 9.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C9]](f32), [[C9]](f32), [[C9]](f32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+01 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.100000e+01 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.200000e+01 + ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C10]](f32), [[C11]](f32), [[C12]](f32) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.300000e+01 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.400000e+01 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.500000e+01 + ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[C13]](f32), [[C14]](f32), [[C15]](f32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>) - ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>) - ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) - ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x f32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x f32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x f32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x f32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(f32), [[UV13:%[0-9]+]]:_(f32), [[UV14:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x f32>) + ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(f32), [[UV16:%[0-9]+]]:_(f32), [[UV17:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x f32>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(f32), [[UV19:%[0-9]+]]:_(f32), [[UV20:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x f32>) + ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(f32), [[UV22:%[0-9]+]]:_(f32), [[UV23:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x f32>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(f32), [[UV25:%[0-9]+]]:_(f32), [[UV26:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x f32>) + ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(f32), [[UV28:%[0-9]+]]:_(f32), [[UV29:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x f32>) + ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(f32), [[UV31:%[0-9]+]]:_(f32), [[UV32:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x f32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](f32), [[PTR_ADD]](p5) :: (store (f32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](f32), [[PTR_ADD1]](p5) :: (store (f32) into stack + 4, addrspace 5) + ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(f32), [[UV34:%[0-9]+]]:_(f32), [[UV35:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x f32>) + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](i32) + ; CHECK-NEXT: G_STORE [[UV33]](f32), [[PTR_ADD2]](p5) :: (store (f32) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](i32) + ; CHECK-NEXT: G_STORE [[UV34]](f32), [[PTR_ADD3]](p5) :: (store (f32) into stack + 12, addrspace 5) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](i32) + ; CHECK-NEXT: G_STORE [[UV35]](f32), [[PTR_ADD4]](p5) :: (store (f32) into stack + 16, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](f32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](f32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](f32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](f32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](f32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](f32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](f32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](f32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](f32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](f32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](f32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](f32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](f32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](f32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](f32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](f32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5032,7 +5037,7 @@ define void @stack_8xv5i32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5041,119 +5046,119 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32), [[C1]](i32), [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C2]](i32), [[C2]](i32), [[C2]](i32), [[C2]](i32), [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C3]](i32), [[C3]](i32), [[C3]](i32), [[C3]](i32), [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C4]](i32), [[C4]](i32), [[C4]](i32), [[C4]](i32), [[C4]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C5]](i32), [[C5]](i32), [[C5]](i32), [[C5]](i32), [[C5]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C6]](i32), [[C7]](i32), [[C8]](i32), [[C9]](i32), [[C10]](i32) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[C11]](i32), [[C12]](i32), [[C13]](i32), [[C14]](i32), [[C15]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) - ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) - ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x i32>) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x i32>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x i32>) + ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x i32>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x i32>) + ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x i32>) + ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32), [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x i32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) - ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](s32) - ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](s32) - ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](s32) - ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) - ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack + 4, addrspace 5) + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](i32) + ; CHECK-NEXT: G_STORE [[UV33]](i32), [[PTR_ADD2]](p5) :: (store (i32) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](i32) + ; CHECK-NEXT: G_STORE [[UV34]](i32), [[PTR_ADD3]](p5) :: (store (i32) into stack + 12, addrspace 5) + ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(i32), [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32), [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x i32>) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](i32) + ; CHECK-NEXT: G_STORE [[UV35]](i32), [[PTR_ADD4]](p5) :: (store (i32) into stack + 16, align 16, addrspace 5) + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](i32) + ; CHECK-NEXT: G_STORE [[UV36]](i32), [[PTR_ADD5]](p5) :: (store (i32) into stack + 20, addrspace 5) + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](i32) + ; CHECK-NEXT: G_STORE [[UV37]](i32), [[PTR_ADD6]](p5) :: (store (i32) into stack + 24, align 8, addrspace 5) + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](i32) + ; CHECK-NEXT: G_STORE [[UV38]](i32), [[PTR_ADD7]](p5) :: (store (i32) into stack + 28, addrspace 5) + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](i32) + ; CHECK-NEXT: G_STORE [[UV39]](i32), [[PTR_ADD8]](p5) :: (store (i32) into stack + 32, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](i32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](i32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](i32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](i32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](i32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](i32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](i32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](i32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](i32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](i32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](i32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](i32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](i32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](i32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](i32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5175,7 +5180,7 @@ define void @stack_8xv5f32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5184,119 +5189,119 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00 - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00 - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01 - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01 - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01 - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01 - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01 - ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C]](f32), [[C]](f32), [[C]](f32), [[C]](f32), [[C]](f32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C1]](f32), [[C1]](f32), [[C1]](f32), [[C1]](f32), [[C1]](f32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C2]](f32), [[C2]](f32), [[C2]](f32), [[C2]](f32), [[C2]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C3]](f32), [[C3]](f32), [[C3]](f32), [[C3]](f32), [[C3]](f32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C4]](f32), [[C4]](f32), [[C4]](f32), [[C4]](f32), [[C4]](f32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C5]](f32), [[C5]](f32), [[C5]](f32), [[C5]](f32), [[C5]](f32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 7.000000e+00 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(f32) = G_FCONSTANT float 8.000000e+00 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(f32) = G_FCONSTANT float 9.000000e+00 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+01 + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C6]](f32), [[C7]](f32), [[C8]](f32), [[C9]](f32), [[C10]](f32) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.100000e+01 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.200000e+01 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.300000e+01 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.400000e+01 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.500000e+01 + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x f32>) = G_BUILD_VECTOR [[C11]](f32), [[C12]](f32), [[C13]](f32), [[C14]](f32), [[C15]](f32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) - ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) - ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x f32>) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x f32>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32), [[UV12:%[0-9]+]]:_(f32), [[UV13:%[0-9]+]]:_(f32), [[UV14:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x f32>) + ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(f32), [[UV16:%[0-9]+]]:_(f32), [[UV17:%[0-9]+]]:_(f32), [[UV18:%[0-9]+]]:_(f32), [[UV19:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x f32>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(f32), [[UV21:%[0-9]+]]:_(f32), [[UV22:%[0-9]+]]:_(f32), [[UV23:%[0-9]+]]:_(f32), [[UV24:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x f32>) + ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(f32), [[UV26:%[0-9]+]]:_(f32), [[UV27:%[0-9]+]]:_(f32), [[UV28:%[0-9]+]]:_(f32), [[UV29:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x f32>) + ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(f32), [[UV31:%[0-9]+]]:_(f32), [[UV32:%[0-9]+]]:_(f32), [[UV33:%[0-9]+]]:_(f32), [[UV34:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x f32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) - ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](s32) - ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](s32) - ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](s32) - ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) - ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](f32), [[PTR_ADD]](p5) :: (store (f32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](f32), [[PTR_ADD1]](p5) :: (store (f32) into stack + 4, addrspace 5) + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](i32) + ; CHECK-NEXT: G_STORE [[UV33]](f32), [[PTR_ADD2]](p5) :: (store (f32) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](i32) + ; CHECK-NEXT: G_STORE [[UV34]](f32), [[PTR_ADD3]](p5) :: (store (f32) into stack + 12, addrspace 5) + ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(f32), [[UV36:%[0-9]+]]:_(f32), [[UV37:%[0-9]+]]:_(f32), [[UV38:%[0-9]+]]:_(f32), [[UV39:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x f32>) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](i32) + ; CHECK-NEXT: G_STORE [[UV35]](f32), [[PTR_ADD4]](p5) :: (store (f32) into stack + 16, align 16, addrspace 5) + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](i32) + ; CHECK-NEXT: G_STORE [[UV36]](f32), [[PTR_ADD5]](p5) :: (store (f32) into stack + 20, addrspace 5) + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](i32) + ; CHECK-NEXT: G_STORE [[UV37]](f32), [[PTR_ADD6]](p5) :: (store (f32) into stack + 24, align 8, addrspace 5) + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](i32) + ; CHECK-NEXT: G_STORE [[UV38]](f32), [[PTR_ADD7]](p5) :: (store (f32) into stack + 28, addrspace 5) + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](i32) + ; CHECK-NEXT: G_STORE [[UV39]](f32), [[PTR_ADD8]](p5) :: (store (f32) into stack + 32, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](f32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](f32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](f32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](f32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](f32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](f32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](f32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](f32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](f32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](f32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](f32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](f32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](f32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](f32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](f32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](f32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5321,25 +5326,25 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[C1]](p4) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[DEF2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[DEF2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[DEF2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[DEF2]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[DEF2]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -5353,7 +5358,7 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5362,33 +5367,33 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_inreg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5401,7 +5406,7 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5410,31 +5415,31 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32_inreg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5447,7 +5452,7 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5456,36 +5461,36 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5498,7 +5503,7 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5507,36 +5512,36 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY9]](i32), [[COPY10]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5549,7 +5554,7 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5558,33 +5563,35 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16_inreg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5594,14 +5601,50 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; CHECK-LABEL: name: test_call_external_void_func_bf16_inreg - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr16 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(bf16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_bf16_inreg + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](bf16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_bf16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; CHECK-NEXT: SI_RETURN call void @external_void_func_bf16_inreg(bfloat inreg %arg) ret void } @@ -5611,7 +5654,7 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5620,31 +5663,31 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $sgpr16 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32_inreg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(f32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](f32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](f32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5657,7 +5700,7 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5666,36 +5709,38 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5708,7 +5753,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5717,32 +5762,32 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x f16>) = COPY $sgpr16 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16_inreg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x f16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5755,7 +5800,7 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5764,43 +5809,43 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr17 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x f16>) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $sgpr17 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY9]](<2 x f16>), [[COPY10]](<2 x f16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f16), [[UV1:%[0-9]+]]:_(f16), [[UV2:%[0-9]+]]:_(f16), [[UV3:%[0-9]+]]:_(f16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x f16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f16>) = G_BUILD_VECTOR [[UV]](f16), [[UV1]](f16), [[UV2]](f16) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(f16), [[UV5:%[0-9]+]]:_(f16), [[UV6:%[0-9]+]]:_(f16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x f16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f16>) = G_BUILD_VECTOR [[UV4]](f16), [[UV5]](f16), [[UV6]](f16), [[DEF]](f16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x f16>), [[UV8:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x f16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x f16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x f16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5813,7 +5858,7 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5822,38 +5867,38 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr17 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x f16>) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $sgpr17 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY9]](<2 x f16>), [[COPY10]](<2 x f16>) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4f16_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x f16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x f16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x f16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5866,7 +5911,7 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5875,36 +5920,36 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p0) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](p0) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5917,7 +5962,7 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5926,36 +5971,36 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p1_inreg ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](p1) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5968,7 +6013,7 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -5983,26 +6028,26 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY9]](p3) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY9]](p3) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p3_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -6015,7 +6060,7 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -6024,44 +6069,44 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $sgpr19 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $sgpr19 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY11]](i32), [[COPY12]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p1_inreg ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](i32) + ; CHECK-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](i32) + ; CHECK-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](i32) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY20]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY21]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY19]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY20]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY21]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -6074,7 +6119,7 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -6091,28 +6136,28 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p5_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll index 2ba0979c72533..cc37c1f588e6a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -10,12 +10,12 @@ define amdgpu_kernel void @constant_fold_vector_add() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[C2]](i64), [[C2]](i64), [[C2]](i64), [[C2]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i64>), [[C1]](p1) :: (store (<4 x i64>) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 entry: %add = add <4 x i64> zeroinitializer, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll index c7870d98d4ca1..d7a8b9a5ba1e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -7,12 +7,12 @@ define i32 @test() { ; CHECK-LABEL: name: test ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[GV]](p0) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[GV]](p0) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[PTRTOINT]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 ptrtoint (ptr @var to i32) to <1 x i32>), i64 0)> to i32) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll index 3a31ab4ab9d0a..b8274b6db29ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -6,10 +6,10 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(f32) = G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val @@ -20,10 +20,10 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(f32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val @@ -34,10 +34,10 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(f32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -48,10 +48,10 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(f32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -62,10 +62,10 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(f32) = G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val @@ -76,16 +76,16 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY2]](f32), [[COPY3]](f32) + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x f32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val @@ -96,16 +96,16 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY2]](f32), [[COPY3]](f32) + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x f32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val @@ -116,16 +116,16 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY2]](f32), [[COPY3]](f32) + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x f32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x f32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val @@ -136,10 +136,10 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FSUB:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSUB]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FSUB:%[0-9]+]]:_(f32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSUB]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -150,10 +150,10 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FMUL:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FMUL]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FMUL:%[0-9]+]]:_(f32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FMUL]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -164,10 +164,10 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FDIV:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FDIV]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FDIV:%[0-9]+]]:_(f32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FDIV]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -178,10 +178,10 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FREM:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FREM]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FREM:%[0-9]+]]:_(f32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FREM]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -192,11 +192,11 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, flo ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[STRICT_FMA:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FMA]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[STRICT_FMA:%[0-9]+]]:_(f32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FMA]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -207,9 +207,9 @@ define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(f32) = G_STRICT_FSQRT [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll index 0897b3794f8d9..cd7b0f168d9c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) { ; Check flags are preserved for an arbitrarry target intrinsic ; CHECK-LABEL: name: rcp_nsz -; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %{{[0-9]+}}(s32) +; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %{{[0-9]+}}(f32) define amdgpu_kernel void @rcp_nsz(float %arg0) { %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0) store float %res, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll index 34d59ea1e608b..907bf69f4871c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -9,41 +9,41 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED: bb.1 (%ir-block.0): ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; FIXED-NEXT: {{ $}} - ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; FIXED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; FIXED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; FIXED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; FIXED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; FIXED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; FIXED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; FIXED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; FIXED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; FIXED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; FIXED-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; FIXED-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; FIXED-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; FIXED-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; FIXED-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; FIXED-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; FIXED-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; FIXED-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; FIXED-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; FIXED-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; FIXED-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; FIXED-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; FIXED-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; FIXED-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; FIXED-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; FIXED-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; FIXED-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; FIXED-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; FIXED-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; FIXED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; FIXED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; FIXED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; FIXED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; FIXED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; FIXED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; FIXED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; FIXED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; FIXED-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; FIXED-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; FIXED-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; FIXED-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; FIXED-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; FIXED-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; FIXED-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; FIXED-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; FIXED-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; FIXED-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; FIXED-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; FIXED-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; FIXED-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; FIXED-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; FIXED-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; FIXED-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; FIXED-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; FIXED-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; FIXED-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; FIXED-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.0, align 16, addrspace 5) ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; FIXED-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; FIXED-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) undef ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 6d32d4c720c99..746a23c9546ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -10,9 +10,9 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) undef ret void @@ -23,9 +23,9 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) undef ret void @@ -36,10 +36,10 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i1), [[DEF]](p1) :: (store (i1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i1 %arg0, ptr addrspace(1) undef ret void @@ -50,14 +50,14 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[ASSERT_ZEXT]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 @@ -70,14 +70,14 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[ASSERT_SEXT]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 @@ -91,24 +91,24 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]] - ; CHECK-NEXT: [[INTRINSIC_W_SIDE_EFFECTS:%[0-9]+]]:_(s1), [[INTRINSIC_W_SIDE_EFFECTS1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1) - ; CHECK-NEXT: G_BRCOND [[INTRINSIC_W_SIDE_EFFECTS]](s1), %bb.2 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[TRUNC]], [[C]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(i1), [[INT1:%[0-9]+]]:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](i1) + ; CHECK-NEXT: G_BRCOND [[INT]](i1), %bb.2 ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[C1]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb2: - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_W_SIDE_EFFECTS1]](s64) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](i64) ; CHECK-NEXT: SI_RETURN bb: br i1 %arg, label %bb2, label %bb1 @@ -126,11 +126,11 @@ define void @void_func_i8(i8 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (store (i8) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i8 %arg0, ptr addrspace(1) undef ret void @@ -141,14 +141,14 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[ASSERT_ZEXT]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[TRUNC]](i8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -161,14 +161,14 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[ASSERT_SEXT]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[TRUNC]](i8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -181,10 +181,10 @@ define void @void_func_i16(i16 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i16), [[DEF]](p1) :: (store (i16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i16 %arg0, ptr addrspace(1) undef ret void @@ -195,14 +195,14 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY]], 16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASSERT_ZEXT]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 @@ -215,14 +215,14 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY]], 16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASSERT_SEXT]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 @@ -235,10 +235,10 @@ define void @void_func_i24(i24 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i24) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i24), [[DEF]](p1) :: (store (i24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store i24 %arg0, ptr addrspace(1) undef ret void @@ -249,11 +249,11 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY]], 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i24) = G_TRUNC [[ASSERT_ZEXT]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i24), [[DEF]](p1) :: (store (i24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store i24 %arg0, ptr addrspace(1) undef ret void @@ -264,11 +264,11 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(i32) = G_ASSERT_SEXT [[COPY]], 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i24) = G_TRUNC [[ASSERT_SEXT]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i24), [[DEF]](p1) :: (store (i24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store i24 %arg0, ptr addrspace(1) undef ret void @@ -279,9 +279,9 @@ define void @void_func_i32(i32 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -293,9 +293,9 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -307,9 +307,9 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -333,12 +333,12 @@ define void @void_func_i48(i48 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i48), [[DEF]](p1) :: (store (i48) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i48 %arg0, ptr addrspace(1) undef ret void @@ -349,15 +349,15 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[TRUNC]](i48) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i64) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i64), [[DEF]](p1) :: (store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i48 %arg0 to i64 %add = add i64 %ext, 12 @@ -370,15 +370,15 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[TRUNC]](i48) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i64) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i64), [[DEF]](p1) :: (store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i48 %arg0 to i64 %add = add i64 %ext, 12 @@ -391,11 +391,11 @@ define void @void_func_i64(i64 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i64 %arg0, ptr addrspace(1) undef ret void @@ -406,13 +406,13 @@ define void @void_func_i95(i95 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i95) = G_TRUNC [[MV]](i96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i95), [[DEF]](p1) :: (store (i95) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i95 %arg0, ptr addrspace(1) undef ret void @@ -423,16 +423,16 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i95) = G_TRUNC [[MV]](i96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i96) = G_ZEXT [[TRUNC]](i95) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i96) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i96), [[DEF]](p1) :: (store (i96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i95 %arg0 to i96 %add = add i96 %ext, 12 @@ -445,16 +445,16 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i95) = G_TRUNC [[MV]](i96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i96) = G_SEXT [[TRUNC]](i95) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i96) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](i96), [[DEF]](p1) :: (store (i96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i95 %arg0 to i96 %add = add i96 %ext, 12 @@ -467,12 +467,12 @@ define void @void_func_i96(i96 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i96), [[DEF]](p1) :: (store (i96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i96 %arg0, ptr addrspace(1) undef ret void @@ -483,9 +483,9 @@ define void @void_func_p0i8(ptr %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -498,9 +498,9 @@ define void @void_func_p1i8(ptr addrspace(1) %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -513,10 +513,11 @@ define void @void_func_f16(half %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BITCAST]](f16), [[DEF]](p1) :: (store (f16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store half %arg0, ptr addrspace(1) undef ret void @@ -527,9 +528,9 @@ define void @void_func_f32(float %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](f32), [[DEF]](p1) :: (store (f32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store float %arg0, ptr addrspace(1) undef ret void @@ -540,11 +541,12 @@ define void @void_func_f64(double %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BITCAST]](f64), [[DEF]](p1) :: (store (f64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store double %arg0, ptr addrspace(1) undef ret void @@ -555,11 +557,11 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[DEF]](p1) :: (store (<2 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i32> %arg0, ptr addrspace(1) undef ret void @@ -570,12 +572,12 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i24>) = G_TRUNC [[BUILD_VECTOR]](<2 x i32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x i24>), [[DEF]](p1) :: (store (<2 x i24>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i24> %arg0, ptr addrspace(1) undef ret void @@ -586,13 +588,13 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x i24>) = G_TRUNC [[BUILD_VECTOR]](<3 x i32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x i24>), [[DEF]](p1) :: (store (<3 x i24>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x i24> %arg0, ptr addrspace(1) undef ret void @@ -603,14 +605,14 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i8>) = G_TRUNC [[BUILD_VECTOR]](<2 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x i8>), [[DEF]](p1) :: (store (<2 x i8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i8> %arg0, ptr addrspace(1) undef ret void @@ -621,16 +623,16 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16), [[TRUNC2]](i16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x i8>) = G_TRUNC [[BUILD_VECTOR]](<3 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x i8>), [[DEF]](p1) :: (store (<3 x i8>) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x i8> %arg0, ptr addrspace(1) undef ret void @@ -641,18 +643,18 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16), [[TRUNC2]](i16), [[TRUNC3]](i16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x i8>), [[DEF]](p1) :: (store (<4 x i8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x i8> %arg0, ptr addrspace(1) undef ret void @@ -678,12 +680,12 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x i32>), [[DEF]](p1) :: (store (<3 x i32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x i32> %arg0, ptr addrspace(1) undef ret void @@ -694,13 +696,13 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[DEF]](p1) :: (store (<4 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x i32> %arg0, ptr addrspace(1) undef ret void @@ -711,14 +713,14 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x i32>), [[DEF]](p1) :: (store (<5 x i32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN store <5 x i32> %arg0, ptr addrspace(1) undef ret void @@ -729,17 +731,17 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x i32>), [[DEF]](p1) :: (store (<8 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <8 x i32> %arg0, ptr addrspace(1) undef ret void @@ -750,25 +752,25 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x i32>), [[DEF]](p1) :: (store (<16 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <16 x i32> %arg0, ptr addrspace(1) undef ret void @@ -779,42 +781,42 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <32 x i32> %arg0, ptr addrspace(1) undef ret void @@ -826,44 +828,44 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32), [[LOAD1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `ptr addrspace(1) undef`, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x i32>), [[DEF]](p1) :: (store (<33 x i32>) into `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: SI_RETURN store <33 x i32> %arg0, ptr addrspace(1) undef ret void @@ -874,15 +876,15 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i64>), [[DEF]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i64> %arg0, ptr addrspace(1) undef ret void @@ -893,12 +895,12 @@ define void @void_func_v2p0i8(<2 x ptr> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `ptr addrspace(1) undef`, addrspace 1) @@ -912,12 +914,12 @@ define void @void_func_v2p1i8(<2 x ptr addrspace(1)> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -931,18 +933,18 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x i64>), [[DEF]](p1) :: (store (<3 x i64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x i64> %arg0, ptr addrspace(1) undef ret void @@ -953,21 +955,21 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i64>), [[DEF]](p1) :: (store (<4 x i64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x i64> %arg0, ptr addrspace(1) undef ret void @@ -978,24 +980,24 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64), [[MV4]](i64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x i64>), [[DEF]](p1) :: (store (<5 x i64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; CHECK-NEXT: SI_RETURN store <5 x i64> %arg0, ptr addrspace(1) undef ret void @@ -1006,33 +1008,33 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64), [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x i64>), [[DEF]](p1) :: (store (<8 x i64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <8 x i64> %arg0, ptr addrspace(1) undef ret void @@ -1043,58 +1045,58 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY24]](i32), [[COPY25]](i32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY26]](i32), [[COPY27]](i32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY28]](i32), [[COPY29]](i32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY30]](i32), [[LOAD]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64), [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64), [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64), [[MV12]](i64), [[MV13]](i64), [[MV14]](i64), [[MV15]](i64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x i64>), [[DEF]](p1) :: (store (<16 x i64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <16 x i64> %arg0, ptr addrspace(1) undef ret void @@ -1105,9 +1107,9 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x i16>), [[DEF]](p1) :: (store (<2 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1118,13 +1120,13 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x i16>), [[DEF]](p1) :: (store (<3 x i16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1135,11 +1137,11 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x i16>), [[DEF]](p1) :: (store (<4 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1150,14 +1152,14 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[COPY2]](<2 x i16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x i16>), [[DEF]](p1) :: (store (<5 x i16>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN store <5 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1168,13 +1170,13 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x i16>), [[DEF]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <8 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1185,17 +1187,17 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[COPY6]](<2 x i16>), [[COPY7]](<2 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x i16>), [[DEF]](p1) :: (store (<16 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <16 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1208,46 +1210,46 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x i16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x i16>) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[COPY6]](<2 x i16>), [[COPY7]](<2 x i16>), [[COPY8]](<2 x i16>), [[COPY9]](<2 x i16>), [[COPY10]](<2 x i16>), [[COPY11]](<2 x i16>), [[COPY12]](<2 x i16>), [[COPY13]](<2 x i16>), [[COPY14]](<2 x i16>), [[COPY15]](<2 x i16>), [[COPY16]](<2 x i16>), [[COPY17]](<2 x i16>), [[COPY18]](<2 x i16>), [[COPY19]](<2 x i16>), [[COPY20]](<2 x i16>), [[COPY21]](<2 x i16>), [[COPY22]](<2 x i16>), [[COPY23]](<2 x i16>), [[COPY24]](<2 x i16>), [[COPY25]](<2 x i16>), [[COPY26]](<2 x i16>), [[COPY27]](<2 x i16>), [[COPY28]](<2 x i16>), [[COPY29]](<2 x i16>), [[COPY30]](<2 x i16>), [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16), [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16), [[UV6:%[0-9]+]]:_(i16), [[UV7:%[0-9]+]]:_(i16), [[UV8:%[0-9]+]]:_(i16), [[UV9:%[0-9]+]]:_(i16), [[UV10:%[0-9]+]]:_(i16), [[UV11:%[0-9]+]]:_(i16), [[UV12:%[0-9]+]]:_(i16), [[UV13:%[0-9]+]]:_(i16), [[UV14:%[0-9]+]]:_(i16), [[UV15:%[0-9]+]]:_(i16), [[UV16:%[0-9]+]]:_(i16), [[UV17:%[0-9]+]]:_(i16), [[UV18:%[0-9]+]]:_(i16), [[UV19:%[0-9]+]]:_(i16), [[UV20:%[0-9]+]]:_(i16), [[UV21:%[0-9]+]]:_(i16), [[UV22:%[0-9]+]]:_(i16), [[UV23:%[0-9]+]]:_(i16), [[UV24:%[0-9]+]]:_(i16), [[UV25:%[0-9]+]]:_(i16), [[UV26:%[0-9]+]]:_(i16), [[UV27:%[0-9]+]]:_(i16), [[UV28:%[0-9]+]]:_(i16), [[UV29:%[0-9]+]]:_(i16), [[UV30:%[0-9]+]]:_(i16), [[UV31:%[0-9]+]]:_(i16), [[UV32:%[0-9]+]]:_(i16), [[UV33:%[0-9]+]]:_(i16), [[UV34:%[0-9]+]]:_(i16), [[UV35:%[0-9]+]]:_(i16), [[UV36:%[0-9]+]]:_(i16), [[UV37:%[0-9]+]]:_(i16), [[UV38:%[0-9]+]]:_(i16), [[UV39:%[0-9]+]]:_(i16), [[UV40:%[0-9]+]]:_(i16), [[UV41:%[0-9]+]]:_(i16), [[UV42:%[0-9]+]]:_(i16), [[UV43:%[0-9]+]]:_(i16), [[UV44:%[0-9]+]]:_(i16), [[UV45:%[0-9]+]]:_(i16), [[UV46:%[0-9]+]]:_(i16), [[UV47:%[0-9]+]]:_(i16), [[UV48:%[0-9]+]]:_(i16), [[UV49:%[0-9]+]]:_(i16), [[UV50:%[0-9]+]]:_(i16), [[UV51:%[0-9]+]]:_(i16), [[UV52:%[0-9]+]]:_(i16), [[UV53:%[0-9]+]]:_(i16), [[UV54:%[0-9]+]]:_(i16), [[UV55:%[0-9]+]]:_(i16), [[UV56:%[0-9]+]]:_(i16), [[UV57:%[0-9]+]]:_(i16), [[UV58:%[0-9]+]]:_(i16), [[UV59:%[0-9]+]]:_(i16), [[UV60:%[0-9]+]]:_(i16), [[UV61:%[0-9]+]]:_(i16), [[UV62:%[0-9]+]]:_(i16), [[UV63:%[0-9]+]]:_(i16), [[UV64:%[0-9]+]]:_(i16), [[UV65:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16), [[UV3]](i16), [[UV4]](i16), [[UV5]](i16), [[UV6]](i16), [[UV7]](i16), [[UV8]](i16), [[UV9]](i16), [[UV10]](i16), [[UV11]](i16), [[UV12]](i16), [[UV13]](i16), [[UV14]](i16), [[UV15]](i16), [[UV16]](i16), [[UV17]](i16), [[UV18]](i16), [[UV19]](i16), [[UV20]](i16), [[UV21]](i16), [[UV22]](i16), [[UV23]](i16), [[UV24]](i16), [[UV25]](i16), [[UV26]](i16), [[UV27]](i16), [[UV28]](i16), [[UV29]](i16), [[UV30]](i16), [[UV31]](i16), [[UV32]](i16), [[UV33]](i16), [[UV34]](i16), [[UV35]](i16), [[UV36]](i16), [[UV37]](i16), [[UV38]](i16), [[UV39]](i16), [[UV40]](i16), [[UV41]](i16), [[UV42]](i16), [[UV43]](i16), [[UV44]](i16), [[UV45]](i16), [[UV46]](i16), [[UV47]](i16), [[UV48]](i16), [[UV49]](i16), [[UV50]](i16), [[UV51]](i16), [[UV52]](i16), [[UV53]](i16), [[UV54]](i16), [[UV55]](i16), [[UV56]](i16), [[UV57]](i16), [[UV58]](i16), [[UV59]](i16), [[UV60]](i16), [[UV61]](i16), [[UV62]](i16), [[UV63]](i16), [[UV64]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `ptr addrspace(1) undef`, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x i16>), [[DEF]](p1) :: (store (<65 x i16>) into `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: SI_RETURN store <65 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1258,11 +1260,11 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x f32>), [[DEF]](p1) :: (store (<2 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x float> %arg0, ptr addrspace(1) undef ret void @@ -1273,12 +1275,12 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32), [[COPY2]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x f32>), [[DEF]](p1) :: (store (<3 x f32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x float> %arg0, ptr addrspace(1) undef ret void @@ -1289,13 +1291,13 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32), [[COPY2]](f32), [[COPY3]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x float> %arg0, ptr addrspace(1) undef ret void @@ -1306,17 +1308,17 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32), [[COPY2]](f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](f32), [[COPY6]](f32), [[COPY7]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x f32>), [[DEF]](p1) :: (store (<8 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <8 x float> %arg0, ptr addrspace(1) undef ret void @@ -1327,25 +1329,25 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32), [[COPY2]](f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](f32), [[COPY6]](f32), [[COPY7]](f32), [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32), [[COPY12]](f32), [[COPY13]](f32), [[COPY14]](f32), [[COPY15]](f32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x f32>), [[DEF]](p1) :: (store (<16 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <16 x float> %arg0, ptr addrspace(1) undef ret void @@ -1356,15 +1358,17 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x f64>), [[DEF]](p1) :: (store (<2 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x double> %arg0, ptr addrspace(1) undef ret void @@ -1375,18 +1379,21 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](f64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x f64>), [[DEF]](p1) :: (store (<3 x f64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x double> %arg0, ptr addrspace(1) undef ret void @@ -1397,21 +1404,25 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[MV3]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](f64), [[BITCAST3]](f64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x f64>), [[DEF]](p1) :: (store (<4 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x double> %arg0, ptr addrspace(1) undef ret void @@ -1422,33 +1433,41 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[MV3]](i64) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[MV4]](i64) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(f64) = G_BITCAST [[MV5]](i64) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[MV6]](i64) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(f64) = G_BITCAST [[MV7]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](f64), [[BITCAST3]](f64), [[BITCAST4]](f64), [[BITCAST5]](f64), [[BITCAST6]](f64), [[BITCAST7]](f64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x f64>), [[DEF]](p1) :: (store (<8 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <8 x double> %arg0, ptr addrspace(1) undef ret void @@ -1459,58 +1478,74 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[MV3]](i64) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[MV4]](i64) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(f64) = G_BITCAST [[MV5]](i64) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[MV6]](i64) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(f64) = G_BITCAST [[MV7]](i64) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(f64) = G_BITCAST [[MV8]](i64) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(f64) = G_BITCAST [[MV9]](i64) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(f64) = G_BITCAST [[MV10]](i64) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(f64) = G_BITCAST [[MV11]](i64) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY24]](i32), [[COPY25]](i32) + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(f64) = G_BITCAST [[MV12]](i64) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY26]](i32), [[COPY27]](i32) + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(f64) = G_BITCAST [[MV13]](i64) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY28]](i32), [[COPY29]](i32) + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(f64) = G_BITCAST [[MV14]](i64) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY30]](i32), [[LOAD]](i32) + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(f64) = G_BITCAST [[MV15]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](f64), [[BITCAST3]](f64), [[BITCAST4]](f64), [[BITCAST5]](f64), [[BITCAST6]](f64), [[BITCAST7]](f64), [[BITCAST8]](f64), [[BITCAST9]](f64), [[BITCAST10]](f64), [[BITCAST11]](f64), [[BITCAST12]](f64), [[BITCAST13]](f64), [[BITCAST14]](f64), [[BITCAST15]](f64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x f64>), [[DEF]](p1) :: (store (<16 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <16 x double> %arg0, ptr addrspace(1) undef ret void @@ -1521,9 +1556,9 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x f16>), [[DEF]](p1) :: (store (<2 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, ptr addrspace(1) undef ret void @@ -1534,13 +1569,13 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY]](<2 x f16>), [[COPY1]](<2 x f16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f16), [[UV1:%[0-9]+]]:_(f16), [[UV2:%[0-9]+]]:_(f16), [[UV3:%[0-9]+]]:_(f16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x f16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f16>) = G_BUILD_VECTOR [[UV]](f16), [[UV1]](f16), [[UV2]](f16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x f16>), [[DEF]](p1) :: (store (<3 x f16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store <3 x half> %arg0, ptr addrspace(1) undef ret void @@ -1551,11 +1586,11 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY]](<2 x f16>), [[COPY1]](<2 x f16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x f16>), [[DEF]](p1) :: (store (<4 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <4 x half> %arg0, ptr addrspace(1) undef ret void @@ -1566,13 +1601,13 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x f16>) = G_CONCAT_VECTORS [[COPY]](<2 x f16>), [[COPY1]](<2 x f16>), [[COPY2]](<2 x f16>), [[COPY3]](<2 x f16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x f16>), [[DEF]](p1) :: (store (<8 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <8 x half> %arg0, ptr addrspace(1) undef ret void @@ -1583,17 +1618,17 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x f16>) = G_CONCAT_VECTORS [[COPY]](<2 x f16>), [[COPY1]](<2 x f16>), [[COPY2]](<2 x f16>), [[COPY3]](<2 x f16>), [[COPY4]](<2 x f16>), [[COPY5]](<2 x f16>), [[COPY6]](<2 x f16>), [[COPY7]](<2 x f16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x f16>), [[DEF]](p1) :: (store (<16 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <16 x half> %arg0, ptr addrspace(1) undef ret void @@ -1605,15 +1640,15 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY3]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile i32 %arg0, ptr addrspace(1) undef store volatile i64 %arg1, ptr addrspace(1) undef @@ -1626,9 +1661,9 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store { i32 } %arg0, ptr addrspace(1) undef ret void @@ -1639,15 +1674,15 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (store (i8) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](i64) + ; CHECK-NEXT: G_STORE [[COPY1]](i32), [[PTR_ADD]](p1) :: (store (i32) into `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store { i8, i32 } %arg0, ptr addrspace(1) undef ret void @@ -1659,14 +1694,14 @@ define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) % ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (load (s8) from %ir.arg0, align 4, addrspace 5) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[COPY]](p5) :: (load (i8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](i8), [[DEF]](p1) :: (store (i8) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[PTR_ADD1]](p1) :: (store (i32) into `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 store { i8, i32 } %arg0.load, ptr addrspace(1) undef @@ -1682,24 +1717,24 @@ define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 } ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile load (s8) from %ir.arg0, align 4, addrspace 5) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile load (s8) from %ir.arg1, align 4, addrspace 5) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load (s32) from %ir.arg1 + 4, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[COPY]](p5) :: (volatile load (i8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load (i32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[COPY1]](p5) :: (volatile load (i8) from %ir.arg1, align 4, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load (i32) from %ir.arg1 + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[PTR_ADD2]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[LOAD3]](i32), [[PTR_ADD3]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY2]](i32), [[DEF1]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 @@ -1717,10 +1752,10 @@ define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, pt ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (load (s64) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[COPY1]](p5) :: (load (i64) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](i64), [[DEF]](p1) :: (store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load i32, ptr addrspace(5) %arg0 %arg1.load = load i64, ptr addrspace(5) %arg1 @@ -1737,10 +1772,10 @@ define void @void_func_byval_i8_align32_i16_align64(ptr addrspace(5) byval(i8) % ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (i8) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (i16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](i8), [[C]](p1) :: (store (i8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](i16), [[C]](p1) :: (store (i16) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load i8, ptr addrspace(5) %arg0 %arg1.load = load i16, ptr addrspace(5) %arg1 @@ -1758,22 +1793,22 @@ define void @byval_a3i32_align128_byval_i16_align64(ptr addrspace(5) byval([3 x ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (i32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (i32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (i32) from %ir.arg0 + 8, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (i16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](i32), [[C]](p1) :: (store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 4 - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C4]](p1) :: (store (s32) into `ptr addrspace(1) null` + 4, addrspace 1) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[C4]](p1) :: (store (i32) into `ptr addrspace(1) null` + 4, addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(p1) = G_CONSTANT i64 8 - ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store (s32) into `ptr addrspace(1) null` + 8, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](i32), [[C6]](p1) :: (store (i32) into `ptr addrspace(1) null` + 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](i16), [[C]](p1) :: (store (i16) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load [3 x i32], ptr addrspace(5) %arg0 %arg1.load = load i16, ptr addrspace(5) %arg1 @@ -1788,48 +1823,48 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, ptr addr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[C]](p1) :: (store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (i8) from %ir.arg2, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](i8), [[C]](p1) :: (store (i8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) null %arg2.load = load i8, ptr addrspace(5) %arg2 @@ -1843,48 +1878,48 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, ptr addrspace(5) by ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[C]](p1) :: (store (i32) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (i8) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](i8), [[C]](p1) :: (store (i8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg2, ptr addrspace(1) null %arg1.load = load i8, ptr addrspace(5) %arg1 @@ -1897,51 +1932,51 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.2, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.1, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](i32), [[DEF]](p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (volatile store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile i32 %arg1, ptr addrspace(1) undef @@ -1955,56 +1990,56 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i1) from %fixed-stack.3, align 4, addrspace 5) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[LOAD1]](i32) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s16) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i16) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[LOAD2]](i16) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i16) from %fixed-stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(f16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (f16) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i1), [[DEF]](p1) :: (volatile store (i1) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (volatile store (i8) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](i16), [[DEF]](p1) :: (volatile store (i16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD4]](f16), [[DEF]](p1) :: (volatile store (f16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile i1 %arg1, ptr addrspace(1) undef @@ -2019,46 +2054,46 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, ptr addrspace(3) %arg1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[DEF]](p1) :: (volatile store (p3) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[DEF]](p1) :: (volatile store (p5) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -2073,54 +2108,54 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (f32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (f32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[LOAD3]](f32), [[LOAD4]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x i32>), [[DEF]](p1) :: (volatile store (<2 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x f32>), [[DEF]](p1) :: (volatile store (<2 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <2 x i32> %arg1, ptr addrspace(1) undef @@ -2133,48 +2168,48 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x i16>) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x f16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x f16>) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x i16>), [[DEF]](p1) :: (volatile store (<2 x i16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x f16>), [[DEF]](p1) :: (volatile store (<2 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <2 x i16> %arg1, ptr addrspace(1) undef @@ -2187,66 +2222,68 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.6, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.5, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD3]](s32), [[LOAD4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD1]](i32), [[LOAD2]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD3]](i32), [[LOAD4]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i32) from %fixed-stack.2, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (i32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (i32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD5]](i32), [[LOAD6]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD7]](i32), [[LOAD8]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV3]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x i64>), [[DEF]](p1) :: (volatile store (<2 x i64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x f64>), [[DEF]](p1) :: (volatile store (<2 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <2 x i64> %arg1, ptr addrspace(1) undef @@ -2259,62 +2296,62 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.6, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.5, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32) ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (f32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (f32) from %fixed-stack.2, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (f32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (f32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[LOAD5]](f32), [[LOAD6]](f32), [[LOAD7]](f32), [[LOAD8]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[DEF]](p1) :: (volatile store (<4 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x f32>), [[DEF]](p1) :: (volatile store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <4 x i32> %arg1, ptr addrspace(1) undef @@ -2327,78 +2364,78 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.16, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.15, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.14, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.13, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.12, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i32) from %fixed-stack.11, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i32) from %fixed-stack.10, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (i32) from %fixed-stack.9, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (i32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32) ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (f32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (f32) from %fixed-stack.6, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (f32) from %fixed-stack.5, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (f32) from %fixed-stack.4, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (f32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (f32) from %fixed-stack.2, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (f32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (f32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x f32>) = G_BUILD_VECTOR [[LOAD9]](f32), [[LOAD10]](f32), [[LOAD11]](f32), [[LOAD12]](f32), [[LOAD13]](f32), [[LOAD14]](f32), [[LOAD15]](f32), [[LOAD16]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x i32>), [[DEF]](p1) :: (volatile store (<8 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x f32>), [[DEF]](p1) :: (volatile store (<8 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <8 x i32> %arg1, ptr addrspace(1) undef @@ -2411,110 +2448,110 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.32, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.31, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.30, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.29, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.28, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5) + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i32) from %fixed-stack.27, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i32) from %fixed-stack.26, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5) + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (i32) from %fixed-stack.25, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (i32) from %fixed-stack.24, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5) + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (i32) from %fixed-stack.23, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (i32) from %fixed-stack.22, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5) + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (i32) from %fixed-stack.21, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (i32) from %fixed-stack.20, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5) + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (i32) from %fixed-stack.19, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (i32) from %fixed-stack.18, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5) + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (i32) from %fixed-stack.17, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (i32) from %fixed-stack.16, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32), [[LOAD16]](i32) ; CHECK-NEXT: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) + ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (f32) from %fixed-stack.15, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (f32) from %fixed-stack.14, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) + ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (f32) from %fixed-stack.13, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (f32) from %fixed-stack.12, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) + ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (f32) from %fixed-stack.11, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (f32) from %fixed-stack.10, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) + ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (f32) from %fixed-stack.9, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (f32) from %fixed-stack.8, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (f32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (f32) from %fixed-stack.6, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (f32) from %fixed-stack.5, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (f32) from %fixed-stack.4, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (f32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (f32) from %fixed-stack.2, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (f32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) + ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(f32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (f32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x f32>) = G_BUILD_VECTOR [[LOAD17]](f32), [[LOAD18]](f32), [[LOAD19]](f32), [[LOAD20]](f32), [[LOAD21]](f32), [[LOAD22]](f32), [[LOAD23]](f32), [[LOAD24]](f32), [[LOAD25]](f32), [[LOAD26]](f32), [[LOAD27]](f32), [[LOAD28]](f32), [[LOAD29]](f32), [[LOAD30]](f32), [[LOAD31]](f32), [[LOAD32]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x i32>), [[DEF]](p1) :: (volatile store (<16 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x f32>), [[DEF]](p1) :: (volatile store (<16 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <16 x i32> %arg1, ptr addrspace(1) undef @@ -2528,22 +2565,22 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32), [[COPY2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(f32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x f32>), [[C]](i32) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(f32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x f32>), [[C1]](i32) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(f32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x f32>), [[C2]](i32) + ; CHECK-NEXT: G_STORE [[EVEC]](f32), [[DEF]](p3) :: (volatile store (f32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](f32), [[DEF]](p3) :: (volatile store (f32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](f32), [[DEF]](p3) :: (volatile store (f32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 @@ -2560,22 +2597,22 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x i32>), [[C]](i32) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x i32>), [[C1]](i32) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x i32>), [[C2]](i32) + ; CHECK-NEXT: G_STORE [[EVEC]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](i32), [[DEF]](p3) :: (volatile store (i32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 @@ -2593,42 +2630,42 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) - ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY8]](i32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16), [[TRUNC2]](i16), [[TRUNC3]](i16), [[TRUNC4]](i16), [[TRUNC5]](i16), [[TRUNC6]](i16), [[TRUNC7]](i16), [[TRUNC8]](i16), [[TRUNC9]](i16), [[TRUNC10]](i16), [[TRUNC11]](i16), [[TRUNC12]](i16), [[TRUNC13]](i16), [[TRUNC14]](i16), [[TRUNC15]](i16) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x i8>) = G_TRUNC [[BUILD_VECTOR]](<16 x i16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x i8>), [[DEF]](p1) :: (volatile store (<16 x i8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <16 x i8> %arg0, ptr addrspace(1) undef ret void @@ -2640,77 +2677,77 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.16, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[LOAD]](i32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i16) from %fixed-stack.15, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.14, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i16) from %fixed-stack.14, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.13, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i16) from %fixed-stack.13, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.12, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i16) from %fixed-stack.12, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.11, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i16) from %fixed-stack.11, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.10, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i16) from %fixed-stack.10, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.9, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (i16) from %fixed-stack.9, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (i16) from %fixed-stack.8, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.7, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (i16) from %fixed-stack.7, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (i16) from %fixed-stack.6, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.5, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (i16) from %fixed-stack.5, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (i16) from %fixed-stack.4, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.3, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (i16) from %fixed-stack.3, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (i16) from %fixed-stack.2, align 8, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (i16) from %fixed-stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(i16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (i16) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x i16>) = G_BUILD_VECTOR [[LOAD1]](i16), [[LOAD2]](i16), [[LOAD3]](i16), [[LOAD4]](i16), [[LOAD5]](i16), [[LOAD6]](i16), [[LOAD7]](i16), [[LOAD8]](i16), [[LOAD9]](i16), [[LOAD10]](i16), [[LOAD11]](i16), [[LOAD12]](i16), [[LOAD13]](i16), [[LOAD14]](i16), [[LOAD15]](i16), [[LOAD16]](i16) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x i8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x i32>), [[DEF]](p1) :: (volatile store (<32 x i32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x i8>), [[DEF]](p1) :: (volatile store (<16 x i8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <16 x i8> %arg1, ptr addrspace(1) undef @@ -2723,20 +2760,20 @@ define void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %ar ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[MV]](p1), [[C]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[C]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](i8), [[C]](p1) :: (volatile store (i8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[C]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -2757,20 +2794,20 @@ define void @vector_ptr_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspa ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](i64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef @@ -2782,10 +2819,10 @@ define void @void_func_i1_inreg(i1 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i1), [[DEF]](p1) :: (store (i1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i1 %arg0, ptr addrspace(1) undef ret void @@ -2796,11 +2833,11 @@ define void @void_func_i8_inreg(i8 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[TRUNC]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](i8), [[DEF]](p1) :: (store (i8) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i8 %arg0, ptr addrspace(1) undef ret void @@ -2811,10 +2848,10 @@ define void @void_func_i16_inreg(i16 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i16), [[DEF]](p1) :: (store (i16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i16 %arg0, ptr addrspace(1) undef ret void @@ -2825,9 +2862,9 @@ define void @void_func_i32_inreg(i32 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -2838,12 +2875,12 @@ define void @void_func_i48_inreg(i48 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i48) = G_TRUNC [[MV]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](i48), [[DEF]](p1) :: (store (i48) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i48 %arg0, ptr addrspace(1) undef ret void @@ -2854,11 +2891,11 @@ define void @void_func_i64_inreg(i64 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i64), [[DEF]](p1) :: (store (i64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i64 %arg0, ptr addrspace(1) undef ret void @@ -2869,12 +2906,12 @@ define void @void_func_i96_inreg(i96 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17, $sgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i96), [[DEF]](p1) :: (store (i96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i96 %arg0, ptr addrspace(1) undef ret void @@ -2885,13 +2922,13 @@ define void @void_func_i128_inreg(i128 inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr19 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr19 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](i128), [[DEF]](p1) :: (store (i128) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN store i128 %arg0, ptr addrspace(1) undef ret void @@ -2902,10 +2939,11 @@ define void @void_func_f16_inreg(half inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BITCAST]](f16), [[DEF]](p1) :: (store (f16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store half %arg0, ptr addrspace(1) undef ret void @@ -2916,10 +2954,11 @@ define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(bf16) = G_BITCAST [[TRUNC]](i16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BITCAST]](bf16), [[DEF]](p1) :: (store (bf16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store bfloat %arg0, ptr addrspace(1) undef ret void @@ -2930,9 +2969,9 @@ define void @void_func_f32_inreg(float inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](f32), [[DEF]](p1) :: (store (f32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store float %arg0, ptr addrspace(1) undef ret void @@ -2943,11 +2982,12 @@ define void @void_func_f64_inreg(double inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BITCAST]](f64), [[DEF]](p1) :: (store (f64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store double %arg0, ptr addrspace(1) undef ret void @@ -2958,14 +2998,14 @@ define void @void_func_v2i1_inreg(<2 x i1> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i1>) = G_TRUNC [[BUILD_VECTOR]](<2 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s1>), [[DEF]](p1) :: (store (<2 x s1>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x i1>), [[DEF]](p1) :: (store (<2 x i1>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i1> %arg0, ptr addrspace(1) undef ret void @@ -2977,14 +3017,14 @@ define void @void_func_v2i8_inreg(<2 x i8> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i8>) = G_TRUNC [[BUILD_VECTOR]](<2 x i16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x i8>), [[DEF]](p1) :: (store (<2 x i8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i8> %arg0, ptr addrspace(1) undef ret void @@ -2995,9 +3035,9 @@ define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $sgpr16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x i16>), [[DEF]](p1) :: (store (<2 x i16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, ptr addrspace(1) undef ret void @@ -3008,9 +3048,9 @@ define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f16>) = COPY $sgpr16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x f16>), [[DEF]](p1) :: (store (<2 x f16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, ptr addrspace(1) undef ret void @@ -3021,10 +3061,10 @@ define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x bf16>) = G_BITCAST [[COPY]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BITCAST]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BITCAST]](<2 x bf16>), [[DEF]](p1) :: (store (<2 x bf16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x bfloat> %arg0, ptr addrspace(1) undef ret void @@ -3035,11 +3075,11 @@ define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[DEF]](p1) :: (store (<2 x i32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i32> %arg0, ptr addrspace(1) undef ret void @@ -3050,11 +3090,11 @@ define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $sgpr17 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY]](f32), [[COPY1]](f32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x f32>), [[DEF]](p1) :: (store (<2 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x float> %arg0, ptr addrspace(1) undef ret void @@ -3065,15 +3105,15 @@ define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr19 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr19 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i64>), [[DEF]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i64> %arg0, ptr addrspace(1) undef ret void @@ -3084,15 +3124,17 @@ define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr19 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr19 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[BITCAST]](f64), [[BITCAST1]](f64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x f64>), [[DEF]](p1) :: (store (<2 x f64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x double> %arg0, ptr addrspace(1) undef ret void @@ -3114,9 +3156,9 @@ define void @void_func_p0_inreg(ptr inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -3129,9 +3171,9 @@ define void @void_func_p1_inreg(ptr addrspace(1) inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -3170,9 +3212,9 @@ define void @void_func_p999_inreg(ptr addrspace(999) inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p999) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p999) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p999), [[DEF]](p1) :: (store (p999) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -3185,12 +3227,12 @@ define void @void_func_v2p0_inreg(<2 x ptr> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr19 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr19 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `ptr addrspace(1) undef`, addrspace 1) @@ -3204,12 +3246,12 @@ define void @void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr19 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr19 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll index 8b32f61c39806..1ce211a1d19ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll @@ -6,19 +6,19 @@ define void @void_func_v2i65(<2 x i65> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s65) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s65) = G_TRUNC [[MV1]](s96) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s65>) = G_BUILD_VECTOR [[TRUNC]](s65), [[TRUNC1]](s65) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i65) = G_TRUNC [[MV]](i96) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i65) = G_TRUNC [[MV1]](i96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i65>) = G_BUILD_VECTOR [[TRUNC]](i65), [[TRUNC1]](i65) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s65>), [[DEF]](p1) :: (store (<2 x s65>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i65>), [[DEF]](p1) :: (store (<2 x i65>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i65> %arg0, ptr addrspace(1) undef ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll index 2321cca252b83..0fbbb1b0878bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -7,30 +7,30 @@ define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i64(<2 x ptr addrspace(1) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV2]](i64), [[MV3]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x i64>) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> %idx ret <2 x ptr addrspace(1)> %gep @@ -45,17 +45,17 @@ define <2 x ptr addrspace(3)> @vector_gep_v2p3_index_v2i32(<2 x ptr addrspace(3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x i32>) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x ptr addrspace(3)> %ptr, <2 x i32> %idx ret <2 x ptr addrspace(3)> %gep @@ -67,27 +67,27 @@ define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i32(<2 x ptr addrspace(1) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[BUILD_VECTOR1]](<2 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x i64>) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i32> %idx ret <2 x ptr addrspace(1)> %gep @@ -99,28 +99,28 @@ define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_i64(<2 x ptr addrspace(1)> ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV2]](i64), [[MV2]](i64) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x i64>) = COPY [[BUILD_VECTOR1]](<2 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x i64>) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i64 %idx ret <2 x ptr addrspace(1)> %gep @@ -132,26 +132,26 @@ define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_i32(<2 x ptr addrspace(1)> ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[BUILD_VECTOR1]](<2 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x i64>) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i32 %idx ret <2 x ptr addrspace(1)> %gep @@ -163,34 +163,34 @@ define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i64_constant(<2 x ptr add ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV2]](i64), [[MV3]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C1]](i64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C2]](i64), [[C2]](i64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C2]](i64), [[C3]](i64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x i64>) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> ret <2 x ptr addrspace(1)> %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index 951be00a124c7..d615170f70ac9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -6,9 +6,9 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -22,33 +22,33 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](i64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY18]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY19]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](i32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -61,12 +61,12 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x i32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index c2a6b183a0f7f..6754979ab405b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -67,10 +67,10 @@ define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY %9 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 @@ -83,8 +83,8 @@ define i32 @test_specific_vgpr_output() nounwind { ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind @@ -95,8 +95,8 @@ define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY %8 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind @@ -107,8 +107,8 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY %8 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind @@ -120,10 +120,10 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228234 /* regdef:VGPR_32 */, def %9 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY %9 + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 @@ -137,11 +137,12 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 3538954 /* regdef:VReg_64 */, def %9 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f64) = COPY %9 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[COPY1]](f64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 @@ -152,11 +153,11 @@ define double @test_multiple_register_outputs_mixed() #0 { define float @test_vector_output() nounwind { ; CHECK-LABEL: name: test_vector_output ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x f32>) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(f32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x f32>), [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 @@ -169,8 +170,8 @@ define amdgpu_kernel void @test_input_vgpr_imm() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](i32) ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) @@ -183,8 +184,8 @@ define amdgpu_kernel void @test_input_sgpr_imm() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](i32) ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) @@ -210,11 +211,11 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](i32) ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 2228233 /* reguse:VGPR_32 */, [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY %9 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](f32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind @@ -228,8 +229,8 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY %9 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a) ret i32 %1 @@ -240,13 +241,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](i32) ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY %11 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) @@ -257,14 +258,14 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY %8 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %10 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY %10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](i32) ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %12, 2359305 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY %12 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind @@ -278,20 +279,20 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](i32) ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %11, 2228234 /* regdef:VGPR_32 */, def %12, 2228234 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13 - ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY %11 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY %12 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY %13 + ; CHECK-NEXT: G_STORE [[COPY6]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY7]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY8]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 @@ -307,11 +308,11 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](i32) ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY %10 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll index ec07b0b1d4f45..defcde874f565 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll @@ -12,8 +12,8 @@ define i32 @load_const_i32_gv() { ; CHECK-LABEL: name: load_const_i32_gv ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (s32) from @const_gv0, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (i32) from @const_gv0, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i32, ptr addrspace(1) @const_gv0, align 4 ret i32 %load @@ -24,13 +24,13 @@ define i32 @load_select_const_i32_gv(i1 %cond) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv1 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[TRUNC]](s1), [[GV]], [[GV1]] - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[SELECT]](p1) :: (dereferenceable invariant load (s32) from %ir.select, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[TRUNC]](i1), [[GV]], [[GV1]] + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[SELECT]](p1) :: (dereferenceable invariant load (i32) from %ir.select, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %select = select i1 %cond, ptr addrspace(1) @const_gv0, ptr addrspace(1) @const_gv1 %load = load i32, ptr addrspace(1) %select, align 4 @@ -41,14 +41,14 @@ define { i32, i64 } @load_const_struct_gv() { ; CHECK-LABEL: name: load_const_struct_gv ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_struct_gv - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (s32) from @const_struct_gv, align 8, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[GV]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (dereferenceable invariant load (s64) from @const_struct_gv + 8, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (i32) from @const_struct_gv, align 8, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[GV]], [[C]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (dereferenceable invariant load (i64) from @const_struct_gv + 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %load = load { i32, i64 }, ptr addrspace(1) @const_struct_gv, align 8 ret { i32, i64 } %load @@ -59,14 +59,14 @@ define void @test_memcpy_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](i64), 0 :: (store (i8) into %ir.dst, addrspace 1), (dereferenceable invariant load (i8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false) ret void @@ -77,14 +77,14 @@ define void @test_memcpy_inline_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](s64) :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CHECK-NEXT: G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](i64) :: (store (i8) into %ir.dst, addrspace 1), (dereferenceable invariant load (i8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false) ret void @@ -95,14 +95,14 @@ define void @test_memmove_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace( ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](i64), 0 :: (store (i8) into %ir.dst, addrspace 1), (dereferenceable invariant load (i8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll index b83b8a0a6d7d4..1d3e5fc4a0bd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -8,13 +8,13 @@ define void @test_memcpy_p1_p3_i64(ptr addrspace(1) %dst, ptr addrspace(3) %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[C]](i64) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](i32), 0 :: (store (i8) into %ir.dst, addrspace 1), (load (i8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false) ret void @@ -25,12 +25,12 @@ define void @test_memcpy_p1_p3_i32(ptr addrspace(1) %dst, ptr addrspace(3) %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](i32), 0 :: (store (i8) into %ir.dst, addrspace 1), (load (i8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false) ret void @@ -41,13 +41,13 @@ define void @test_memcpy_p1_p3_i16(ptr addrspace(1) %dst, ptr addrspace(3) %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[C]](i16) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](i32), 0 :: (store (i8) into %ir.dst, addrspace 1), (load (i8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false) ret void @@ -59,12 +59,12 @@ define void @test_memcpy_p3_p1_i64(ptr addrspace(3) %dst, ptr addrspace(1) %src) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[C]](i64) + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](i32), 0 :: (store (i8) into %ir.dst, addrspace 3), (load (i8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) %dst, ptr addrspace(1) %src, i64 256, i1 false) ret void @@ -76,11 +76,11 @@ define void @test_memcpy_p3_p1_i32(ptr addrspace(3) %dst, ptr addrspace(1) %src) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](i32), 0 :: (store (i8) into %ir.dst, addrspace 3), (load (i8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 256, i1 false) ret void @@ -92,12 +92,12 @@ define void @test_memcpy_p3_p1_i16(ptr addrspace(3) %dst, ptr addrspace(1) %src) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[C]](i16) + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](i32), 0 :: (store (i8) into %ir.dst, addrspace 3), (load (i8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) %dst, ptr addrspace(1) %src, i16 256, i1 false) ret void @@ -108,13 +108,13 @@ define void @test_memmove_p1_p3_i64(ptr addrspace(1) %dst, ptr addrspace(3) %src ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[C]](i64) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](i32), 0 :: (store (i8) into %ir.dst, addrspace 1), (load (i8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false) ret void @@ -125,12 +125,12 @@ define void @test_memmove_p1_p3_i32(ptr addrspace(1) %dst, ptr addrspace(3) %src ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](i32), 0 :: (store (i8) into %ir.dst, addrspace 1), (load (i8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false) ret void @@ -141,13 +141,13 @@ define void @test_memmove_p1_p3_i16(ptr addrspace(1) %dst, ptr addrspace(3) %src ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[C]](i16) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](i32), 0 :: (store (i8) into %ir.dst, addrspace 1), (load (i8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false) ret void @@ -158,13 +158,13 @@ define void @test_memset_p1_i64(ptr addrspace(1) %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](i8), [[C]](i64), 0 :: (store (i8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 256, i1 false) ret void @@ -175,14 +175,14 @@ define void @test_memset_p1_i32(ptr addrspace(1) %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[C]](i32) + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](i8), [[ZEXT]](i64), 0 :: (store (i8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1.i32(ptr addrspace(1) %dst, i8 %val, i32 256, i1 false) ret void @@ -193,14 +193,14 @@ define void @test_memset_p1_i16(ptr addrspace(1) %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[C]](i16) + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](i8), [[ZEXT]](i64), 0 :: (store (i8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1.i16(ptr addrspace(1) %dst, i8 %val, i16 256, i1 false) ret void @@ -212,11 +212,11 @@ define void @test_memset_p3_i64(ptr addrspace(3) %dst, i8 %val) { ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[C]](i64) + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](i8), [[TRUNC1]](i32), 0 :: (store (i8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3.i64(ptr addrspace(3) %dst, i8 %val, i64 256, i1 false) ret void @@ -228,10 +228,10 @@ define void @test_memset_p3_i32(ptr addrspace(3) %dst, i8 %val) { ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](i8), [[C]](i32), 0 :: (store (i8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3.i32(ptr addrspace(3) %dst, i8 %val, i32 256, i1 false) ret void @@ -243,11 +243,11 @@ define void @test_memset_p3_i16(ptr addrspace(3) %dst, i8 %val) { ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[C]](i16) + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](i8), [[ZEXT]](i32), 0 :: (store (i8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3.i16(ptr addrspace(3) %dst, i8 %val, i16 256, i1 false) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll index 101bb6c0ed123..14f585e3e3fdf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,11 +5,11 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[INT1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[INT]], [[INT1]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll index c509cf4b1bf37..f376f3f6113a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll @@ -6,20 +6,20 @@ define <2 x ptr addrspace(7)> @no_auto_constfold_gep_vector() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(p8) = G_CONSTANT i128 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p8>) = G_BUILD_VECTOR [[C]](p8), [[C]](p8) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 123 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 123 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %gep = getelementptr i8, <2 x ptr addrspace(7)> zeroinitializer, <2 x i32> ret <2 x ptr addrspace(7)> %gep @@ -30,43 +30,43 @@ define <2 x ptr addrspace(7)> @gep_vector_splat(<2 x ptr addrspace(7)> %ptrs, i6 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p8) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p8) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p8) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p8) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p8>) = G_BUILD_VECTOR [[MV]](p8), [[MV1]](p8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x i64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x p8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV2]](s64), [[C]](s32) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[SHUF]](<2 x s64>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[BUILD_VECTOR1]], [[TRUNC]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x i64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV2]](i64), [[C]](i32) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x i64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x i64>), [[DEF]], shufflemask(0, 0) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[SHUF]](<2 x i64>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x i32>) = G_ADD [[BUILD_VECTOR1]], [[TRUNC]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ADD]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %gep = getelementptr i8, <2 x ptr addrspace(7)> %ptrs, i64 %idx ret <2 x ptr addrspace(7)> %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll index d5c68e772d0b1..c55f06d515839 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll @@ -6,13 +6,13 @@ define ptr addrspace(7) @no_auto_constfold_gep() { ; CHECK-LABEL: name: no_auto_constfold_gep ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(p8) = G_CONSTANT i128 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 123 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](p8) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[C1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 123 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](p8) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[C1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %gep = getelementptr i8, ptr addrspace(7) null, i32 123 ret ptr addrspace(7) %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll index b53610a0f22e5..6ea7b4f65c24c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll @@ -6,9 +6,9 @@ define void @prefetch_read(ptr %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: G_PREFETCH [[MV]](p0), 0, 0, 0 :: (load unknown-size from %ir.ptr, align 1) ; CHECK-NEXT: SI_RETURN call void @llvm.prefetch.p0(ptr %ptr, i32 0, i32 0, i32 0) @@ -20,9 +20,9 @@ define void @prefetch_write(ptr %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; CHECK-NEXT: G_PREFETCH [[MV]](p0), 1, 1, 1 :: (store unknown-size into %ir.ptr, align 1) ; CHECK-NEXT: SI_RETURN call void @llvm.prefetch.p0(ptr %ptr, i32 1, i32 1, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll index 7a8e521817a37..dfe1e0840b566 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -6,16 +6,16 @@ define ptr @ptrmask_flat_i64(ptr %ptr, i64 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](i64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) ret ptr %masked @@ -27,8 +27,8 @@ define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) { ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll index 00de01a694403..8b99af76faec1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -6,13 +6,13 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDSAT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -24,10 +24,10 @@ define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(i32) = G_UADDSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -39,16 +39,16 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(i64) = G_UADDSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UADDSAT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -60,16 +60,16 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x i32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -81,13 +81,13 @@ define i16 @saddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(i16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SADDSAT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -99,10 +99,10 @@ define i32 @saddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(i32) = G_SADDSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -114,16 +114,16 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(i64) = G_SADDSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SADDSAT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -135,16 +135,16 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x i32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -156,13 +156,13 @@ define i16 @usubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBSAT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -174,10 +174,10 @@ define i32 @usubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(i32) = G_USUBSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -189,16 +189,16 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(i64) = G_USUBSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[USUBSAT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -210,16 +210,16 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x i32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -231,13 +231,13 @@ define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(i16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SSUBSAT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -249,10 +249,10 @@ define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(i32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -264,16 +264,16 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(i64) = G_SSUBSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SSUBSAT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -285,16 +285,16 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x i32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -306,13 +306,13 @@ define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](i16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[USHLSAT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -324,10 +324,10 @@ define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -339,16 +339,16 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i64) = G_USHLSAT [[MV]], [[MV1]](i64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[USHLSAT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -360,16 +360,16 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x i32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -381,13 +381,13 @@ define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](i16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SSHLSAT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -399,10 +399,10 @@ define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -414,16 +414,16 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i64) = G_SSHLSAT [[MV]], [[MV1]](i64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SSHLSAT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -435,16 +435,16 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x i32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 8813462652efd..862535988e549 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -7,10 +7,10 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GCN-NEXT: $vgpr0 = COPY [[ADD]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 @@ -21,15 +21,15 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: %4:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), %4(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: %4:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), %4(p5) :: (volatile store (i32) into %ir.gep, addrspace 5) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GCN-NEXT: $vgpr0 = COPY [[ADD]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 @@ -43,14 +43,14 @@ define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -62,19 +62,19 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), %5(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), %5(p5) :: (volatile store (i32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -89,19 +89,19 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), %5(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), %5(p5) :: (volatile store (i32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -116,14 +116,14 @@ define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -137,20 +137,20 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, ; GCN-NEXT: liveins: $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.a.kernarg.offset1, align 16, addrspace 4) - ; GCN-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32) - ; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x i32>) from %ir.a.kernarg.offset1, align 16, addrspace 4) + ; GCN-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x i32>), [[C]](i32) + ; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x i32>), [[C1]](i32) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[EVEC1]](s32) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[EVEC]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[EVEC1]](i32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 entry: @@ -163,12 +163,12 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) b ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (i32) from %ir.arg1, addrspace 5) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[LOAD]] + ; GCN-NEXT: $vgpr0 = COPY [[ADD]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %arg1.load = load i32, ptr addrspace(5) %arg1, align 4 %add0 = add i32 %arg0, %arg1.load @@ -181,24 +181,24 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](i32), 0 :: (dereferenceable store (i32) into stack, addrspace 5), (dereferenceable load (i32) from %ir.b.byval, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) %b.byval) @@ -213,50 +213,50 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.1, addrspace 5) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](i32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `ptr addrspace(5) inttoptr (i32 16 to ptr addrspace(5))`, align 16, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](i32), 0 :: (dereferenceable store (i32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (i32) from `ptr addrspace(5) inttoptr (i32 16 to ptr addrspace(5))`, align 16, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) inttoptr (i32 16 to ptr addrspace(5))) @@ -268,47 +268,47 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] - ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.0, align 8, addrspace 5) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[LOAD1]] + ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[LOAD2]] + ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 @@ -323,83 +323,83 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.3, align 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](i32), [[FRAME_INDEX3]](p5) :: (store (i32) into %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD1]](i32), [[FRAME_INDEX4]](p5) :: (store (i32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: G_STORE [[LOAD2]](i32), [[FRAME_INDEX5]](p5) :: (store (i32) into %fixed-stack.0, align 8, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](i32) + ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](i32) + ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](i32) + ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](i32) + ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](i32) + ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](i32) + ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](i32) + ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](i32) + ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](i32) + ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](i32) + ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](i32) + ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](i32) + ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](i32) + ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](i32) + ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](i32) + ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](i32) + ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](i32) + ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](i32) + ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](i32) + ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](i32) + ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](i32) + ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](i32) + ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](i32) + ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](i32) + ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](i32) + ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](i32) + ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](i32) + ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](i32) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -411,88 +411,88 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: %39:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), %39(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: %39:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), %39(p5) :: (volatile store (i32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](i32), [[FRAME_INDEX4]](p5) :: (store (i32) into %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD1]](i32), [[FRAME_INDEX5]](p5) :: (store (i32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: G_STORE [[LOAD2]](i32), [[FRAME_INDEX6]](p5) :: (store (i32) into %fixed-stack.0, align 8, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](i32) + ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](i32) + ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](i32) + ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](i32) + ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](i32) + ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](i32) + ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](i32) + ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](i32) + ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](i32) + ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](i32) + ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](i32) + ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](i32) + ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](i32) + ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](i32) + ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](i32) + ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](i32) + ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](i32) + ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](i32) + ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](i32) + ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](i32) + ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](i32) + ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](i32) + ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](i32) + ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](i32) + ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](i32) + ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](i32) + ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](i32) + ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](i32) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -510,58 +510,58 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD]](p5) :: (store (i32) into stack, align 16, addrspace 5) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C2]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD1]](p5) :: (store (i32) into stack + 4, addrspace 5) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD2]](p5) :: (store (i32) into stack + 8, align 8, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr4 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr5 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr6 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr7 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr8 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr9 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr10 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr11 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr12 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr13 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr14 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr15 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr16 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr17 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr18 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr19 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr20 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr21 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr22 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr23 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr24 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr25 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr26 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr27 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr28 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr29 = COPY [[C]](i32) + ; GCN-NEXT: $vgpr30 = COPY [[C]](i32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; GCN-NEXT: $vgpr0 = COPY [[COPY3]](i32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) @@ -574,24 +574,24 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x i32>) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -606,88 +606,88 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: %39:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), %39(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: %39:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), %39(p5) :: (volatile store (i32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD]](i32), [[FRAME_INDEX4]](p5) :: (store (i32) into %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) + ; GCN-NEXT: G_STORE [[LOAD1]](i32), [[FRAME_INDEX5]](p5) :: (store (i32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: G_STORE [[LOAD2]](i32), [[FRAME_INDEX6]](p5) :: (store (i32) into %fixed-stack.0, align 8, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](i32) + ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](i32) + ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](i32) + ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](i32) + ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](i32) + ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](i32) + ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](i32) + ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](i32) + ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](i32) + ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](i32) + ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](i32) + ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](i32) + ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](i32) + ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](i32) + ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](i32) + ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](i32) + ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](i32) + ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](i32) + ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](i32) + ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](i32) + ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](i32) + ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](i32) + ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](i32) + ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](i32) + ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](i32) + ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](i32) + ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](i32) + ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](i32) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -702,97 +702,97 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.9, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.7, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.7, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.6, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: %47:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), %47(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: %47:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX7]], [[C2]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), %47(p5) :: (volatile store (i32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) + ; GCN-NEXT: G_STORE [[C1]](i32), [[FRAME_INDEX8]](p5) :: (store (i32) into %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) + ; GCN-NEXT: G_STORE [[C1]](i32), [[FRAME_INDEX9]](p5) :: (store (i32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: G_STORE [[C1]](i32), [[FRAME_INDEX10]](p5) :: (store (i32) into %fixed-stack.0, align 8, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr4 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr5 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr6 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr7 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr8 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr9 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr10 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr11 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr12 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr13 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr14 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr15 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr16 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr17 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr18 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr19 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr20 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr21 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr22 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr23 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr24 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr25 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr26 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr27 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr28 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr29 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr30 = COPY [[C1]](i32) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -809,156 +809,156 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.35, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.34, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.34, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.33, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.33, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.32, addrspace 5) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.32, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; GCN-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.31, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5) + ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i32) from %fixed-stack.30, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i32) from %fixed-stack.29, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; GCN-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5) + ; GCN-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (i32) from %fixed-stack.28, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; GCN-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (i32) from %fixed-stack.27, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; GCN-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5) + ; GCN-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (i32) from %fixed-stack.26, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; GCN-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (i32) from %fixed-stack.25, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; GCN-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5) + ; GCN-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (i32) from %fixed-stack.24, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; GCN-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (i32) from %fixed-stack.23, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; GCN-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5) + ; GCN-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (i32) from %fixed-stack.22, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; GCN-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (i32) from %fixed-stack.21, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; GCN-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5) + ; GCN-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (i32) from %fixed-stack.20, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; GCN-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD16:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (i32) from %fixed-stack.19, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; GCN-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5) + ; GCN-NEXT: [[LOAD17:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (i32) from %fixed-stack.18, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; GCN-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD18:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (i32) from %fixed-stack.17, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; GCN-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5) + ; GCN-NEXT: [[LOAD19:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (i32) from %fixed-stack.16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; GCN-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD20:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (i32) from %fixed-stack.15, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; GCN-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) + ; GCN-NEXT: [[LOAD21:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (i32) from %fixed-stack.14, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; GCN-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD22:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (i32) from %fixed-stack.13, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; GCN-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) + ; GCN-NEXT: [[LOAD23:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (i32) from %fixed-stack.12, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; GCN-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD24:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (i32) from %fixed-stack.11, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; GCN-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) + ; GCN-NEXT: [[LOAD25:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (i32) from %fixed-stack.10, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; GCN-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD26:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (i32) from %fixed-stack.9, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; GCN-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) + ; GCN-NEXT: [[LOAD27:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (i32) from %fixed-stack.8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; GCN-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD28:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (i32) from %fixed-stack.7, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; GCN-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) + ; GCN-NEXT: [[LOAD29:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (i32) from %fixed-stack.6, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD30:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (i32) from %fixed-stack.5, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; GCN-NEXT: [[LOAD31:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (i32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (i32) from %fixed-stack.3, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (i32) from %fixed-stack.2, addrspace 5) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.alloca1 - ; GCN-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca0, addrspace 5) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca0 + 4, addrspace 5) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca0 + 8, addrspace 5) - ; GCN-NEXT: G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store (s64) into %ir.alloca1, addrspace 5) - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) - ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) + ; GCN-NEXT: G_STORE [[C]](i32), [[FRAME_INDEX34]](p5) :: (store (i32) into %ir.alloca0, addrspace 5) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD]](p5) :: (store (i32) into %ir.alloca0 + 4, addrspace 5) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD1]](p5) :: (store (i32) into %ir.alloca0 + 8, addrspace 5) + ; GCN-NEXT: G_STORE [[C1]](i64), [[FRAME_INDEX35]](p5) :: (store (i64) into %ir.alloca1, addrspace 5) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](i32) + ; GCN-NEXT: G_STORE [[C1]](i64), [[PTR_ADD2]](p5) :: (store (i64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) + ; GCN-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](i32), 0 :: (dereferenceable store (i96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (i96) from %ir.alloca0, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY9]](s32) - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) + ; GCN-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](i32), 0 :: (dereferenceable store (i128) into %fixed-stack.0, addrspace 5), (dereferenceable load (i128) from %ir.alloca1, align 8, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[COPY9]](i32) + ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -977,183 +977,183 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(i32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (i32) from %fixed-stack.36, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.35, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (i32) from %fixed-stack.35, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.34, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (i32) from %fixed-stack.34, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33 - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.33, addrspace 5) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (i32) from %fixed-stack.33, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 - ; GCN-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (i32) from %fixed-stack.32, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) + ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (i32) from %fixed-stack.31, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (i32) from %fixed-stack.30, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; GCN-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5) + ; GCN-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (i32) from %fixed-stack.29, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; GCN-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (i32) from %fixed-stack.28, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; GCN-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5) + ; GCN-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (i32) from %fixed-stack.27, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; GCN-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (i32) from %fixed-stack.26, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; GCN-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5) + ; GCN-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (i32) from %fixed-stack.25, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; GCN-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (i32) from %fixed-stack.24, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; GCN-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5) + ; GCN-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (i32) from %fixed-stack.23, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; GCN-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (i32) from %fixed-stack.22, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; GCN-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5) + ; GCN-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (i32) from %fixed-stack.21, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; GCN-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD16:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (i32) from %fixed-stack.20, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; GCN-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5) + ; GCN-NEXT: [[LOAD17:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (i32) from %fixed-stack.19, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; GCN-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD18:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (i32) from %fixed-stack.18, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; GCN-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5) + ; GCN-NEXT: [[LOAD19:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (i32) from %fixed-stack.17, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; GCN-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD20:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (i32) from %fixed-stack.16, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; GCN-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) + ; GCN-NEXT: [[LOAD21:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (i32) from %fixed-stack.15, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; GCN-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD22:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (i32) from %fixed-stack.14, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; GCN-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) + ; GCN-NEXT: [[LOAD23:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (i32) from %fixed-stack.13, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; GCN-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD24:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (i32) from %fixed-stack.12, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; GCN-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) + ; GCN-NEXT: [[LOAD25:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (i32) from %fixed-stack.11, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; GCN-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD26:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (i32) from %fixed-stack.10, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; GCN-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) + ; GCN-NEXT: [[LOAD27:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (i32) from %fixed-stack.9, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; GCN-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD28:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (i32) from %fixed-stack.8, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; GCN-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; GCN-NEXT: [[LOAD29:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (i32) from %fixed-stack.7, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; GCN-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; GCN-NEXT: [[LOAD30:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (i32) from %fixed-stack.6, align 8, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; GCN-NEXT: [[LOAD31:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (i32) from %fixed-stack.5, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (i32) from %fixed-stack.4, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (i32) from %fixed-stack.3, addrspace 5) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca, addrspace 5) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca + 4, addrspace 5) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) + ; GCN-NEXT: G_STORE [[C]](i32), [[FRAME_INDEX34]](p5) :: (store (i32) into %ir.alloca, addrspace 5) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD]](p5) :: (store (i32) into %ir.alloca + 4, addrspace 5) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](i32) + ; GCN-NEXT: G_STORE [[C]](i32), [[PTR_ADD1]](p5) :: (store (i32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) + ; GCN-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](i32), 0 :: (dereferenceable store (i96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (i96) from %ir.alloca, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) + ; GCN-NEXT: G_STORE [[C1]](i32), [[FRAME_INDEX36]](p5) :: (store (i32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) + ; GCN-NEXT: G_STORE [[COPY9]](i32), [[FRAME_INDEX37]](p5) :: (store (i32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN-NEXT: $vgpr0 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr4 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr5 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr6 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr7 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr8 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr9 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr10 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr11 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr12 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr13 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr14 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr15 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr16 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr17 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr18 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr19 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr20 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr21 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr22 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr23 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr24 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr25 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr26 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr27 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr28 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr29 = COPY [[C1]](i32) + ; GCN-NEXT: $vgpr30 = COPY [[C1]](i32) + ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1169,42 +1169,42 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1218,15 +1218,15 @@ define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspa ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[MV]](p1) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x i32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %a) @@ -1240,40 +1240,40 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[TRUNC]](i16) + ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1287,40 +1287,42 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1334,47 +1336,47 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY9]](<2 x i16>), [[COPY10]](<2 x i16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x i16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i16>) = G_BUILD_VECTOR [[UV]](i16), [[UV1]](i16), [[UV2]](i16) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) - ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(i16), [[UV5:%[0-9]+]]:_(i16), [[UV6:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x i16>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[UV4]](i16), [[UV5]](i16), [[UV6]](i16), [[DEF]](i16) + ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i16>) + ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x i16>) + ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x i16>) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1388,42 +1390,42 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY9]](<2 x i16>), [[COPY10]](<2 x i16>) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x i16>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1437,48 +1439,48 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(s32) = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32(i32) = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY9]](i32), [[COPY10]](i32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY11]](i32), [[COPY12]](i32) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]](s64) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(i64) = COPY [[COPY5]](i64) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY4]](i32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[COPY3]](i32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x i64>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GCN-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GCN-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x i32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY21]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](i64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](i32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](i32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](i32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY20]](i32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY21]](i32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll index cb81871be3a58..00844fa52c2cf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -8,13 +8,13 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) - ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](f32), [[COPY1]](f32), -1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[INT1]](i1) + ; CHECK-NEXT: G_STORE [[INT]](f32), [[DEF]](p1) :: (store (f32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[SEXT]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) %extract0 = extractvalue { float, i1 } %call, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll index 7be77f403a214..f79b626956ce1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -8,7 +8,7 @@ define void @tail_call_void_func_void() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -21,23 +21,23 @@ define void @tail_call_void_func_void() { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x i32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](i64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](i32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](i32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](i32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](i32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](i32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 tail call void @external_void_func_void() ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll index b655f575cf3e3..c6abc03a2aaa8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll @@ -4,11 +4,11 @@ define i8 @f_i1_1() { ; CHECK-LABEL: name: f_i1_1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x i8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x i8>), [[C]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EVEC]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %E1 = extractelement <256 x i8> undef, i1 true ret i8 %E1 @@ -17,11 +17,11 @@ define i8 @f_i1_1() { define i8 @f_i8_255() { ; CHECK-LABEL: name: f_i8_255 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x i8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x i8>), [[C]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EVEC]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %E1 = extractelement <256 x i8> undef, i8 255 ret i8 %E1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll index 3b16c77548a23..ddd914e1db87f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -9,66 +9,69 @@ define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_swap_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_swap_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -81,66 +84,69 @@ define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -153,66 +159,69 @@ define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_sub_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_sub_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -225,66 +234,69 @@ define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_smin_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_smin_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -298,66 +310,69 @@ define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_umin_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_umin_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -370,66 +385,69 @@ define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_smax_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_smax_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -442,66 +460,69 @@ define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_umax_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_umax_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -514,66 +535,69 @@ define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_and_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_and_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -586,66 +610,69 @@ define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_or_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_or_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -658,66 +685,69 @@ define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_xor_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_xor_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -730,66 +760,69 @@ define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_inc_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_inc_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -802,66 +835,69 @@ define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_dec_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_dec_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -874,72 +910,75 @@ define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_cmpswap_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_cmpswap_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -952,69 +991,72 @@ define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -1027,82 +1069,85 @@ define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](i32), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -1115,82 +1160,85 @@ define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](i32), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) @@ -1203,69 +1251,72 @@ define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -1278,82 +1329,85 @@ define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](i32), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -1366,82 +1420,85 @@ define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](i32), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_2dmsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -1454,85 +1511,88 @@ define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](i32), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_2darraymsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -1545,66 +1605,69 @@ define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_add_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_add_1d_slc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](i32), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) @@ -1617,75 +1680,78 @@ define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_cmpswap_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_cmpswap_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -1698,88 +1764,91 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_cmpswap_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_cmpswap_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -1792,91 +1861,94 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: atomic_cmpswap_2darraymsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (volatile dereferenceable load store (i32), addrspace 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll index ea40703bf98d0..1ec82ac306004 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -9,84 +9,87 @@ define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -99,93 +102,96 @@ define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -199,112 +205,115 @@ define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_l ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -319,112 +328,115 @@ define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -439,93 +451,96 @@ define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -539,112 +554,115 @@ define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -659,112 +677,115 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_2dmsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -779,118 +800,121 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_2darraymsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -906,93 +930,96 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_mip_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_mip_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1006,112 +1033,115 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_mip_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_mip_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1126,118 +1156,121 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_mip_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_mip_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1253,118 +1286,121 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_mip_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_mip_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1380,112 +1416,115 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_mip_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_mip_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1500,118 +1539,121 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_mip_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_mip_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1627,84 +1669,87 @@ define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1717,93 +1762,96 @@ define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1817,112 +1865,115 @@ define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BITCAST3]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1937,112 +1988,115 @@ define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BITCAST3]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2057,93 +2111,96 @@ define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2157,112 +2214,115 @@ define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BITCAST3]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2277,112 +2337,115 @@ define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BITCAST3]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_2dmsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2397,118 +2460,121 @@ define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BITCAST4]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_2darraymsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2524,93 +2590,96 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_mip_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_mip_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BITCAST2]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2624,112 +2693,115 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BITCAST3]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_mip_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_mip_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2744,118 +2816,121 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BITCAST4]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_mip_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_mip_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2871,118 +2946,121 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BITCAST4]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_mip_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_mip_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2998,112 +3076,115 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BITCAST3]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_mip_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_mip_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BITCAST3]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -3118,118 +3199,121 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BITCAST4]](<4 x i32>), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_mip_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_mip_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BITCAST4]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -3245,84 +3329,87 @@ define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3335,84 +3422,87 @@ define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3425,84 +3515,87 @@ define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3515,84 +3608,87 @@ define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3605,84 +3701,87 @@ define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3695,84 +3794,87 @@ define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3785,84 +3887,87 @@ define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_2dmsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3875,84 +3980,87 @@ define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_2darraymsaa ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3965,72 +4073,75 @@ define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (f32), addrspace 8) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: load_1d_V1 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (f32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST1]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: load_1d_V1 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (f32), addrspace 8) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST1]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4043,78 +4154,81 @@ define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coord ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX10NSA-LABEL: name: load_1d_V2 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX12-LABEL: name: load_1d_V2 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4127,72 +4241,75 @@ define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](f32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](i32), 2, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (f32), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1d_V1 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (s32), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](f32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](i32), 2, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (f32), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1d_V1 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (s32), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](f32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](i32), 2, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (f32), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4205,78 +4322,81 @@ define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32) + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY10]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<2 x f32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<2 x i32>), 12, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 3 :: (dereferenceable store (<2 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1d_V2 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<2 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY10]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<2 x f32>) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<2 x i32>), 12, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<2 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1d_V2 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<2 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY10]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<2 x f32>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<2 x i32>), 12, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 1 :: (dereferenceable store (<2 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4289,84 +4409,87 @@ define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 1, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_1d_glc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 1, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_1d_glc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 1, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4379,84 +4502,87 @@ define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_1d_slc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4469,84 +4595,87 @@ define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 3, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_1d_glc_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 3, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_1d_glc_slc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 3, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4559,84 +4688,87 @@ define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 1, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1d_glc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 1, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1d_glc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 1, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4649,84 +4781,87 @@ define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1d_slc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 2, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4739,84 +4874,87 @@ define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdat ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 3, 3 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10NSA-LABEL: name: store_1d_glc_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 3, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX10NSA-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: store_1d_glc_slc ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable store (<4 x s32>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[COPY8]](f32), [[COPY9]](f32), [[COPY10]](f32), [[COPY11]](f32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x f32>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BITCAST1]](<4 x i32>), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 0, 3, 1 :: (dereferenceable store (<4 x f32>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4829,36 +4967,36 @@ define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x floa ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x f32>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_dmask0 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x f32>) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_dmask0 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x f32>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -4871,90 +5009,102 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 1, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[BITCAST2]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[BITCAST3]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[BITCAST4]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_1d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX10NSA-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST1]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[BITCAST2]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[BITCAST3]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[BITCAST4]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_1d_tfe ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX12-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX12-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST1]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[BITCAST2]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[BITCAST3]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[BITCAST4]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -4970,99 +5120,111 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[BITCAST3]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[BITCAST5]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX10NSA-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX10NSA-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST2]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[BITCAST3]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[BITCAST4]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[BITCAST5]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_2d_tfe ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX12-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX12-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST2]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[BITCAST3]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[BITCAST4]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[BITCAST5]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -5079,118 +5241,130 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF1]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[BITCAST5]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[BITCAST6]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_3d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF1]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX10NSA-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10NSA-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX10NSA-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST3]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[BITCAST4]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[BITCAST5]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[BITCAST6]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_3d_tfe ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX12-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF1]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX12-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST3]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[BITCAST4]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[BITCAST5]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[BITCAST6]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -5208,124 +5382,136 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[BITCAST5]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[BITCAST6]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[BITCAST7]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX10NSA-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX10NSA-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX10NSA-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10NSA-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX10NSA-NEXT: [[BITCAST7:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST4]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[BITCAST5]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[BITCAST6]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[BITCAST7]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: load_2darraymsaa_tfe ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX12-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY8]](<2 x i16>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY9]](<2 x i16>) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX12-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST4]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[BITCAST5]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[BITCAST6]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[BITCAST7]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index f61f985cd24ab..3bc8ca630ce5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -7,42 +7,46 @@ define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) - ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(i16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) - ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(i16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](i16) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex @@ -53,48 +57,50 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -105,64 +111,91 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; UNPACKED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x i16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[DEF]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST10]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -173,56 +206,60 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV4]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV5]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f16>), addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x i16>) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -233,46 +270,54 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_tfe_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 @@ -286,54 +331,56 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_tfe_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -347,62 +394,97 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; UNPACKED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; UNPACKED-NEXT: G_STORE [[UV3]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](i32) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; PACKED-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; PACKED-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST9]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST12]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -416,62 +498,67 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<4 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[UV5:%[0-9]+]]:_(<2 x f16>), [[UV6:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV6]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<4 x f16>), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](i32) + ; PACKED-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -485,20 +572,24 @@ define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[DEF]](f16) + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[DEF]](f16) + ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex @@ -509,47 +600,49 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -560,20 +653,20 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x f16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x f16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -584,59 +677,89 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[DEF]](i16) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[DEF]](i16) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -647,56 +770,87 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](i32) + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[DEF]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[DEF]](i16) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[DEF]](i16) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -707,40 +861,50 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -751,56 +915,60 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x f16>), [[UV4:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV3]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV4]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x i16>) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -811,55 +979,60 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV2]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV3]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[DEF]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -870,52 +1043,57 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[DEF]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -926,24 +1104,24 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -954,46 +1132,54 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 @@ -1007,54 +1193,56 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -1068,54 +1256,56 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -1129,65 +1319,95 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; UNPACKED-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF2]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[DEF2]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST10]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1201,63 +1421,94 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF2]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[DEF2]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST10]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1271,63 +1522,94 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; UNPACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[DEF2]](f16) + ; UNPACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; UNPACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST8]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; PACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[DEF1]](i16) + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; PACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; PACKED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; PACKED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; PACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; PACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; PACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[DEF2]](f16) + ; PACKED-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; PACKED-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST10]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1341,62 +1623,67 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV3]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV4]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV5]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<3 x f16>), align 8, addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](i32) + ; PACKED-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1410,61 +1697,66 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x f16>), [[UV4:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV3]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV4]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f16>), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[DEF1]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1478,59 +1770,64 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV2]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV3]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[DEF1]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1544,59 +1841,64 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; UNPACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV2]](<2 x f16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV3]](<2 x f16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f16), addrspace 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x i32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](i32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[BITCAST]](<2 x i16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[DEF1]](<2 x i16>) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x f16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x f16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index adf7e6d38b989..ddb4e09f23463 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -6,20 +6,21 @@ define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -30,22 +31,23 @@ define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -56,23 +58,24 @@ define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<3 x f32>), align 16, addrspace 8) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[UV2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -83,24 +86,25 @@ define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -111,23 +115,24 @@ define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 @@ -141,24 +146,26 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x i32>) + ; GCN-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -172,25 +179,28 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<3 x f32>), align 16, addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GCN-NEXT: G_STORE [[UV3]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -204,26 +214,30 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GCN-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GCN-NEXT: G_STORE [[UV4]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -237,10 +251,10 @@ define amdgpu_ps float @image_load_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(f32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -251,22 +265,24 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -277,12 +293,12 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x f32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<2 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -293,24 +309,27 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -321,23 +340,26 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -348,13 +370,13 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x f32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<3 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[UV2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -365,25 +387,29 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<3 x f32>), align 16, addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x i32>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -394,25 +420,29 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -423,24 +453,28 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[DEF]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -451,14 +485,14 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x f32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<4 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -469,23 +503,24 @@ define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 @@ -499,25 +534,26 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV2]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -531,25 +567,26 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV2]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -563,26 +600,27 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV3]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV4]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV5]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x i32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x i32>) + ; GCN-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV3]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV4]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[UV5]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -596,26 +634,29 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -629,26 +670,29 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -662,27 +706,28 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV4]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV5]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV6]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV7]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<3 x f32>), align 16, addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GCN-NEXT: G_STORE [[UV3]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV4]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[UV5]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[UV6]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[UV7]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -696,27 +741,31 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x i32>) + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: G_STORE [[UV2]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -730,27 +779,31 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -764,27 +817,31 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[DEF1]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GCN-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GCN-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GCN-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll index 4d36e0f797016..f8470cefd1ff7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -9,51 +9,53 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x i32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX6-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2darraymsaa ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v @@ -64,59 +66,67 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr ad ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX6-NEXT: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x i32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX6-NEXT: G_STORE [[UV4]](i32), [[MV]](p1) :: (store (i32) into %ir.out, addrspace 1) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GFX6-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GFX6-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GFX6-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10NSA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10NSA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x i32>) + ; GFX10NSA-NEXT: G_STORE [[UV4]](i32), [[MV]](p1) :: (store (i32) into %ir.out, addrspace 1) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV2]](i32) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[UV3]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[BITCAST1]](f32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[BITCAST2]](f32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[BITCAST3]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll index 2c155b72c649f..bebef93abfe59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -9,41 +9,43 @@ define amdgpu_ps float @image_load_3d_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GFX6-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x i32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: image_load_3d_f32 ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.3d.f32.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -54,47 +56,49 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX6-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x i32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX6-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10NSA-LABEL: name: image_load_3d_tfe_f32 ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[BUILD_VECTOR]](<8 x i32>), 1, 0, 0 :: (dereferenceable load (f32), addrspace 8) + ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[UV]](i32) + ; GFX10NSA-NEXT: G_STORE [[UV1]](i32), [[DEF]](p1) :: (store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; GFX10NSA-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %val, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll index 659a8a2ff254b..741862c30998d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll @@ -9,120 +9,132 @@ define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -134,124 +146,144 @@ define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -263,141 +295,169 @@ define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_3d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -409,141 +469,169 @@ define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cube ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cube ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cube ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -555,124 +643,144 @@ define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_1darray ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_1darray ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_1darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -684,141 +792,169 @@ define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_2darray ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_2darray ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_2darray ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -830,129 +966,145 @@ define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -964,133 +1116,157 @@ define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1102,124 +1278,144 @@ define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1231,141 +1427,169 @@ define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1377,133 +1601,157 @@ define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1515,149 +1763,181 @@ define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1669,133 +1949,153 @@ define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_b_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_b_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_b_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1807,141 +2107,169 @@ define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_b_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_b_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_b_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1953,141 +2281,165 @@ define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_b_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_b_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_b_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2099,149 +2451,181 @@ define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_b_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_b_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_b_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2253,141 +2637,169 @@ define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_b_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_b_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_b_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2399,153 +2811,189 @@ define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_b_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_b_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_b_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2557,149 +3005,181 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_b_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_b_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_b_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2711,161 +3191,201 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_b_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_b_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_b_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2877,145 +3397,173 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_d_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3027,165 +3575,217 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_d_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3197,208 +3797,284 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_d_3d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3410,153 +4086,185 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_d_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3568,173 +4276,229 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_d_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3746,153 +4510,189 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_d_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3904,181 +4704,241 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_d_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -4090,161 +4950,201 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_d_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -4256,190 +5156,254 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_d_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -4451,145 +5415,173 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cd_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -4601,165 +5593,217 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cd_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -4771,153 +5815,185 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_cd_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -4929,173 +6005,229 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_cd_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -5107,153 +6239,189 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cd_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -5265,181 +6433,241 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_cd_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[BITCAST12]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST13]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -5451,161 +6679,201 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_cd_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -5617,190 +6885,254 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_cd_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST15]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -5812,124 +7144,144 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_l_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_l_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_l_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -5941,141 +7293,169 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_l_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_l_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_l_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6087,133 +7467,157 @@ define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_l_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_l_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_l_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6225,149 +7629,181 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_l_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_l_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_l_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6379,120 +7815,132 @@ define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_lz_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_lz_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_lz_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST1]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6504,124 +7952,144 @@ define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_lz_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_lz_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_lz_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[BITCAST3]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6633,129 +8101,145 @@ define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_lz_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_lz_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_lz_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[BITCAST3]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST4]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6767,133 +8251,157 @@ define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: sample_c_lz_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_lz_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_lz_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[BITCAST4]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -6905,184 +8413,248 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 8) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (f32), addrspace 8) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX10-LABEL: name: sample_c_d_o_2darray_V1 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 8) - ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (f32), addrspace 8) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: sample_c_d_o_2darray_V1 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 8) - ; GFX11-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (f32), addrspace 8) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX11-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: sample_c_d_o_2darray_V1 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (f32), addrspace 8) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -7094,192 +8666,256 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: sample_c_d_o_2darray_V2 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX11-LABEL: name: sample_c_d_o_2darray_V2 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX12-LABEL: name: sample_c_d_o_2darray_V2 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll index 241170b94318a..5503c83754a24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll @@ -8,108 +8,138 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<9 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<9 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST9]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<5 x s32>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BUILD_VECTOR2]](<5 x i32>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST9]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[BUILD_VECTOR2]](<6 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<6 x i32>) = G_BUILD_VECTOR [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BUILD_VECTOR2]](<6 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST9]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -121,111 +151,144 @@ define amdgpu_ps <4 x float> @sample_c_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BUILD_VECTOR2]](<10 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BUILD_VECTOR2]](<10 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<6 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<6 x i32>) = G_BUILD_VECTOR [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BUILD_VECTOR2]](<6 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[BUILD_VECTOR2]](<7 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BUILD_VECTOR2]](<7 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.3d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -237,114 +300,150 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_3d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BUILD_VECTOR2]](<11 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY22]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32), [[BITCAST10]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BUILD_VECTOR2]](<11 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST11]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_cl_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<7 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY22]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32), [[BITCAST10]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BUILD_VECTOR2]](<7 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST11]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_cl_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX12-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[BUILD_VECTOR2]](<8 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX12-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY22]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32), [[BITCAST10]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BUILD_VECTOR2]](<8 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST11]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.3d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -356,117 +455,153 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_o_3d(<8 x i32> inreg %rsrc, <4 x i32 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[BUILD_VECTOR2]](<12 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(f32) = COPY $vgpr11 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY22]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY23]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x i32>) = G_BUILD_VECTOR [[COPY12]](i32), [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32), [[BITCAST10]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[BUILD_VECTOR2]](<12 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST11]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_cl_o_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<8 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; GFX11-NEXT: [[COPY23:%[0-9]+]]:_(f32) = COPY $vgpr11 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY22]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY23]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32), [[BITCAST10]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[COPY12]](i32), [[BITCAST]](i32), [[BITCAST1]](i32), [[BITCAST2]](i32), [[BUILD_VECTOR2]](<8 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST11]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_cl_o_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX12-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX12-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[BUILD_VECTOR2]](<9 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[COPY21:%[0-9]+]]:_(f32) = COPY $vgpr9 + ; GFX12-NEXT: [[COPY22:%[0-9]+]]:_(f32) = COPY $vgpr10 + ; GFX12-NEXT: [[COPY23:%[0-9]+]]:_(f32) = COPY $vgpr11 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY21]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY22]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY23]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[BITCAST3]](i32), [[BITCAST4]](i32), [[BITCAST5]](i32), [[BITCAST6]](i32), [[BITCAST7]](i32), [[BITCAST8]](i32), [[BITCAST9]](i32), [[BITCAST10]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[COPY12]](i32), [[BITCAST]](i32), [[BITCAST1]](i32), [[BUILD_VECTOR2]](<9 x i32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST11]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.3d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll index f05b258c974d1..ea44b62665bf4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll @@ -8,108 +8,129 @@ define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i3 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_1d_g16_a16 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_1d_g16_a16 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -121,123 +142,162 @@ define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i3 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_2d_g16_a16 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_2d_g16_a16 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[BITCAST9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -249,156 +309,213 @@ define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i3 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_3d_g16_a16 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_3d_g16_a16 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY18]](i32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY19]](i32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY20]](i32) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST15]](i16), [[BITCAST16]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST17]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll index cc2a8ba9c4d5d..db59cdefb7093 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -8,105 +8,123 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -118,120 +136,153 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -243,150 +294,198 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](i32) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST13]](i32) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST14]](i32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](i32) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST13]](i32) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST14]](i32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_3d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST8]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[BITCAST10]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST11]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](i32) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST13]](i32) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST14]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST18]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -398,111 +497,132 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST6]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST6]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST6]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -514,127 +634,163 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST11]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST11]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST11]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -646,111 +802,132 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -762,127 +939,163 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_d_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -894,118 +1107,142 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1017,135 +1254,174 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST12]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_d_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST12]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1157,105 +1433,123 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST6]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1267,120 +1561,153 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST12]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1392,111 +1719,132 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST6]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST6]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST6]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1508,127 +1856,163 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST11]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST11]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST11]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1640,111 +2024,132 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(f32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY14]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST2]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST8]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1756,127 +2161,163 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_cd_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[BITCAST5]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST6]](i16), [[BITCAST7]](i16) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST8]](i32) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST14]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1888,118 +2329,142 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_cl_1d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(f32) = COPY $vgpr3 + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(f32) = COPY $vgpr4 + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY15]](f32) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[COPY16]](f32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST3]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST4]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST5]](i32) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST7]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST10]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2011,135 +2476,174 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX11-LABEL: name: sample_c_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST12]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[CONCAT_VECTORS]](<4 x i16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: sample_c_cd_cl_2d ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(f32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(f32) = COPY $vgpr5 + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY12]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY17]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST12]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST16]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2151,129 +2655,168 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 8) - ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (f32), addrspace 8) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: sample_c_d_o_2darray_V1 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 8) - ; GFX11-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (f32), addrspace 8) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX11-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: sample_c_d_o_2darray_V1 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 8) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (f32), addrspace 8) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](i32) + ; GFX12-NEXT: $vgpr0 = COPY [[BITCAST17]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2285,135 +2828,174 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX11-LABEL: name: sample_c_d_o_2darray_V2 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX11-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX11-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX11-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX11-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX11-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX11-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX11-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX11-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX11-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX11-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX11-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX11-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[CONCAT_VECTORS]](<6 x i16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX11-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX12-LABEL: name: sample_c_d_o_2darray_V2 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 8) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $sgpr10 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $sgpr11 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $sgpr12 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $sgpr13 + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(f32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY16]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY17]](i32) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(f32) = COPY $vgpr6 + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(f32) = COPY $vgpr7 + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(f32) = COPY $vgpr8 + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY13]](f32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[COPY18]](f32) + ; GFX12-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[COPY19]](f32) + ; GFX12-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[COPY20]](f32) + ; GFX12-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY12]](i32) + ; GFX12-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST5]](i16), [[BITCAST6]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST9]](i32) + ; GFX12-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](i32) + ; GFX12-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](i32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>) + ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[CONCAT_VECTORS]](<8 x i16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x f32>), addrspace 8) + ; GFX12-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x i32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST17]](<2 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index 4c15ad0355781..874dd8c6ed652 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -11,103 +11,113 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST1]](i16), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (f16), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST1]](i16), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (f16), addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 ; ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST1]](i16), 1, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (f16), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST1]](i16), 1, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (f16), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: image_store_f16 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST1]](i16), 1, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (f16), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -118,105 +128,110 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BITCAST1]](i32), [[LSHR]](i32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x i32>), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<2 x f16>), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; GFX81-LABEL: name: image_store_v2f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BITCAST1]](i32), [[DEF]](i32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x i32>), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<2 x f16>), addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 ; ; GFX9-LABEL: name: image_store_v2f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST]](<2 x i16>), 3, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<2 x f16>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST]](<2 x i16>), 3, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<2 x f16>), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: image_store_v2f16 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST]](<2 x i16>), 3, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<2 x f16>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -227,129 +242,169 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY11]](<2 x f16>) + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; UNPACKED-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; UNPACKED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNPACKED-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; UNPACKED-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; UNPACKED-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; UNPACKED-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; UNPACKED-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x i32>), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<3 x f16>), align 8, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; GFX81-LABEL: name: image_store_v3f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY11]](<2 x f16>) + ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX81-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX81-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX81-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX81-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX81-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX81-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX81-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX81-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST7]](i16) + ; GFX81-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST8]](i16) + ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX81-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX81-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX81-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; GFX81-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C]](i32) + ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX81-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[C1]], [[SHL1]] + ; GFX81-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>) + ; GFX81-NEXT: [[BITCAST13:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x i16>) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST13]](<3 x i32>), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<3 x f16>), align 8, addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 ; ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %34(i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY11]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x i16>), 7, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<3 x f16>), align 8, addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; - ; GFX10-LABEL: name: image_store_v3f16 - ; GFX10: bb.1 (%ir-block.0): - ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) - ; GFX10-NEXT: S_ENDPGM 0 - ; ; GFX12-LABEL: name: image_store_v3f16 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY11]](<2 x f16>) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; GFX12-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX12-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX12-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST7]](i16), [[BITCAST8]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST9]](i16), [[DEF]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x i16>), 7, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<3 x f16>), align 8, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -360,117 +415,123 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY10]](<2 x f16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY11]](<2 x f16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](<2 x i16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[LSHR]](i32), [[BITCAST3]](i32), [[LSHR1]](i32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x i32>), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<4 x f16>), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; GFX81-LABEL: name: image_store_v4f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) - ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY10]](<2 x f16>), [[COPY11]](<2 x f16>) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<4 x i16>) + ; GFX81-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x i32>) + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[DEF]](i32), [[DEF]](i32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x i32>), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<4 x f16>), addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 ; ; GFX9-LABEL: name: image_store_v4f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY10]](<2 x f16>), [[COPY11]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST]](<4 x i16>), 15, [[BUILD_VECTOR1]](<2 x i32>), $noreg, [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<4 x f16>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 ; ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY10]](<2 x f16>), [[COPY11]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST]](<4 x i16>), 15, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<4 x f16>), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: image_store_v4f16 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr7 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr8 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $sgpr9 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(<2 x f16>) = COPY $vgpr3 + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[COPY10]](<2 x f16>), [[COPY11]](<2 x f16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX12-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST]](<4 x i16>), 15, [[COPY8]](i32), [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable store (<4 x f16>), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 074272f7bed86..6b0694453b8f5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -1405,11 +1405,16 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W32: ; %bb.0: ; %entry ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 -; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 +; GFX10_W32-NEXT: v_lshlrev_b32_e32 v3, 2, v0 +; GFX10_W32-NEXT: v_mov_b32_e32 v4, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1] +; GFX10_W32-NEXT: v_mov_b32_e32 v2, s1 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s0 ; GFX10_W32-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX10_W32-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; GFX10_W32-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo +; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 +; GFX10_W32-NEXT: flat_load_dwordx3 v[1:3], v[1:2] ; GFX10_W32-NEXT: s_and_saveexec_b32 s1, s0 ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W32-NEXT: ; %bb.1: ; %bb @@ -1425,23 +1430,28 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W32-NEXT: s_and_b32 s0, exec_lo, s0 ; GFX10_W32-NEXT: s_or_b32 vcc_lo, s2, s0 ; GFX10_W32-NEXT: .LBB13_2: ; %exit +; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8 +; GFX10_W32-NEXT: global_store_dword v4, v0, s[0:1] offset:8 ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W64: ; %bb.0: ; %entry ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 -; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX10_W64-NEXT: s_mov_b64 vcc, 0 +; GFX10_W64-NEXT: v_lshlrev_b32_e32 v3, 2, v0 +; GFX10_W64-NEXT: v_mov_b32_e32 v4, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1] +; GFX10_W64-NEXT: v_mov_b32_e32 v2, s1 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s0 ; GFX10_W64-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 +; GFX10_W64-NEXT: v_add_co_u32 v1, vcc, v1, v3 +; GFX10_W64-NEXT: v_add_co_ci_u32_e32 v2, vcc, 0, v2, vcc +; GFX10_W64-NEXT: s_mov_b64 vcc, 0 +; GFX10_W64-NEXT: flat_load_dwordx3 v[1:3], v[1:2] ; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W64-NEXT: ; %bb.1: ; %bb @@ -1457,25 +1467,28 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX10_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX10_W64-NEXT: .LBB13_2: ; %exit +; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8 +; GFX10_W64-NEXT: global_store_dword v4, v0, s[0:1] offset:8 ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX11_W32: ; %bb.0: ; %entry ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28 -; GFX11_W32-NEXT: v_and_b32_e32 v3, 0x3ff, v0 -; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v3 +; GFX11_W32-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 0x3ff, v0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: global_load_b96 v[0:2], v0, s[0:1] +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11_W32-NEXT: v_lshlrev_b32_e32 v2, 2, v4 ; GFX11_W32-NEXT: s_mov_b32 s1, exec_lo -; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v3 +; GFX11_W32-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11_W32-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 +; GFX11_W32-NEXT: flat_load_b96 v[0:2], v[0:1] +; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX11_W32-NEXT: ; %bb.1: ; %bb ; GFX11_W32-NEXT: s_load_b64 s[2:3], s[4:5], 0x50 @@ -1494,21 +1507,26 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 -; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 +; GFX11_W32-NEXT: global_store_b32 v3, v0, s[0:1] offset:8 ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX11_W64: ; %bb.0: ; %entry ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28 -; GFX11_W64-NEXT: v_and_b32_e32 v3, 0x3ff, v0 -; GFX11_W64-NEXT: s_mov_b64 vcc, 0 +; GFX11_W64-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX11_W64-NEXT: v_mov_b32_e32 v3, 0 ; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec -; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v3 +; GFX11_W64-NEXT: v_lshlrev_b32_e32 v2, 2, v4 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1] -; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v3 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s0 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s1 +; GFX11_W64-NEXT: v_add_co_u32 v0, vcc, v0, v2 +; GFX11_W64-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc +; GFX11_W64-NEXT: s_mov_b64 vcc, 0 +; GFX11_W64-NEXT: s_waitcnt_depctr 0xfffe +; GFX11_W64-NEXT: flat_load_b96 v[0:2], v[0:1] +; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX11_W64-NEXT: ; %bb.1: ; %bb ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x50 @@ -1527,9 +1545,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 +; GFX11_W64-NEXT: global_store_b32 v3, v0, s[0:1] offset:8 ; GFX11_W64-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll index 19b0057d69b69..89fbe84b04930 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -622,10 +622,13 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 +; GFX8-UNPACKED-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX8-UNPACKED-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s0 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, 0xffff, v1 -; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v2 -; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, 0xffff, v1 +; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-UNPACKED-NEXT: ; return to shader part epilog ; @@ -640,8 +643,14 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 +; GFX8-PACKED-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: @@ -655,7 +664,9 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX9-NEXT: s_mov_b32 s6, s8 ; GFX9-NEXT: s_mov_b32 s7, s9 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 +; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_bfi_b32 v0, s0, v0, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: load_1d_v3f16_xyz: @@ -670,6 +681,7 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-NEXT: v_bfi_b32 v0, 0xffff, v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: load_1d_v3f16_xyz: @@ -684,6 +696,7 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX12-NEXT: s_mov_b32 s7, s9 ; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D d16 ; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_bfi_b32 v0, 0xffff, v0, v0 ; GFX12-NEXT: ; return to shader part epilog %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index b26ddbdd7a342..063f85286feac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -51,13 +51,13 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 %node_ptr, float % ; GFX10-LABEL: image_bvh_intersect_ray_a16: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v5 +; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX10-NEXT: v_and_b32_e32 v10, 0xffff, v7 ; GFX10-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX10-NEXT: v_lshl_or_b32 v5, v9, 16, v5 +; GFX10-NEXT: v_lshl_or_b32 v6, v10, 16, v6 ; GFX10-NEXT: v_alignbit_b32 v7, v8, v7, 16 -; GFX10-NEXT: v_and_or_b32 v5, 0xffff, v5, v9 -; GFX10-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 ; GFX10-NEXT: image_bvh_intersect_ray v[0:3], v[0:7], s[0:3] a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -114,13 +114,13 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float ; GFX10-LABEL: image_bvh64_intersect_ray_a16: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v6 +; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX10-NEXT: v_and_b32_e32 v7, 0xffff, v7 ; GFX10-NEXT: v_and_b32_e32 v11, 0xffff, v8 ; GFX10-NEXT: v_and_b32_e32 v9, 0xffff, v9 -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX10-NEXT: v_lshl_or_b32 v6, v10, 16, v6 +; GFX10-NEXT: v_lshl_or_b32 v7, v11, 16, v7 ; GFX10-NEXT: v_alignbit_b32 v8, v9, v8, 16 -; GFX10-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 -; GFX10-NEXT: v_and_or_b32 v7, 0xffff, v7, v11 ; GFX10-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -259,34 +259,33 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v18, v0 -; GFX1030-NEXT: v_mov_b32_e32 v19, v1 +; GFX1030-NEXT: v_mov_b32_e32 v19, v0 +; GFX1030-NEXT: v_mov_b32_e32 v20, v1 +; GFX1030-NEXT: v_mov_b32_e32 v21, v2 +; GFX1030-NEXT: v_mov_b32_e32 v22, v3 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5 -; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v7 -; GFX1030-NEXT: v_mov_b32_e32 v20, v2 -; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v21, v3 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v22, v4 -; GFX1030-NEXT: v_mov_b32_e32 v16, v9 -; GFX1030-NEXT: v_mov_b32_e32 v17, v10 -; GFX1030-NEXT: v_and_or_b32 v23, 0xffff, v5, v0 -; GFX1030-NEXT: v_and_or_b32 v24, 0xffff, v6, v1 -; GFX1030-NEXT: v_alignbit_b32 v25, v2, v7, 16 +; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v5 +; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; GFX1030-NEXT: v_and_b32_e32 v3, 0xffff, v7 +; GFX1030-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; GFX1030-NEXT: v_mov_b32_e32 v23, v4 +; GFX1030-NEXT: v_mov_b32_e32 v17, v9 +; GFX1030-NEXT: v_mov_b32_e32 v18, v10 +; GFX1030-NEXT: v_lshl_or_b32 v24, v0, 16, v1 +; GFX1030-NEXT: v_lshl_or_b32 v25, v3, 16, v2 +; GFX1030-NEXT: v_alignbit_b32 v26, v8, v7, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v16 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v17 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v17 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v18 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v11 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[18:25], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr18 +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[19:26], s[4:7] a16 +; GFX1030-NEXT: ; implicit-def: $vgpr17 ; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 @@ -294,6 +293,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 +; GFX1030-NEXT: ; implicit-def: $vgpr26 ; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB7_1 @@ -307,14 +307,14 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1013-NEXT: v_mov_b32_e32 v17, v9 ; GFX1013-NEXT: v_mov_b32_e32 v18, v10 ; GFX1013-NEXT: v_lshrrev_b32_e32 v9, 16, v5 +; GFX1013-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX1013-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX1013-NEXT: v_and_b32_e32 v10, 0xffff, v7 ; GFX1013-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX1013-NEXT: v_lshl_or_b32 v5, v9, 16, v5 +; GFX1013-NEXT: v_lshl_or_b32 v6, v10, 16, v6 ; GFX1013-NEXT: v_alignbit_b32 v7, v8, v7, 16 -; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v9 -; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 ; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 ; GFX1013-NEXT: v_readfirstlane_b32 s4, v17 ; GFX1013-NEXT: v_readfirstlane_b32 s5, v18 @@ -501,35 +501,34 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v19, v0 -; GFX1030-NEXT: v_mov_b32_e32 v20, v1 +; GFX1030-NEXT: v_mov_b32_e32 v20, v0 +; GFX1030-NEXT: v_mov_b32_e32 v21, v1 +; GFX1030-NEXT: v_mov_b32_e32 v22, v2 +; GFX1030-NEXT: v_mov_b32_e32 v23, v3 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6 -; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v21, v2 -; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v9 -; GFX1030-NEXT: v_mov_b32_e32 v22, v3 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v23, v4 -; GFX1030-NEXT: v_mov_b32_e32 v24, v5 -; GFX1030-NEXT: v_mov_b32_e32 v17, v10 -; GFX1030-NEXT: v_mov_b32_e32 v18, v11 -; GFX1030-NEXT: v_and_or_b32 v25, 0xffff, v6, v0 -; GFX1030-NEXT: v_and_or_b32 v26, 0xffff, v7, v1 -; GFX1030-NEXT: v_alignbit_b32 v27, v2, v8, 16 +; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v6 +; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v7 +; GFX1030-NEXT: v_and_b32_e32 v3, 0xffff, v8 +; GFX1030-NEXT: v_and_b32_e32 v9, 0xffff, v9 +; GFX1030-NEXT: v_mov_b32_e32 v24, v4 +; GFX1030-NEXT: v_mov_b32_e32 v25, v5 +; GFX1030-NEXT: v_mov_b32_e32 v18, v10 +; GFX1030-NEXT: v_mov_b32_e32 v19, v11 +; GFX1030-NEXT: v_lshl_or_b32 v26, v0, 16, v1 +; GFX1030-NEXT: v_lshl_or_b32 v27, v3, 16, v2 +; GFX1030-NEXT: v_alignbit_b32 v28, v9, v8, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v17 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v18 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v18 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v19 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v12 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[19:27], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr19 +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[20:28], s[4:7] a16 +; GFX1030-NEXT: ; implicit-def: $vgpr18 ; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 @@ -538,6 +537,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1030-NEXT: ; implicit-def: $vgpr25 ; GFX1030-NEXT: ; implicit-def: $vgpr26 ; GFX1030-NEXT: ; implicit-def: $vgpr27 +; GFX1030-NEXT: ; implicit-def: $vgpr28 ; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB9_1 @@ -551,14 +551,14 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1013-NEXT: v_mov_b32_e32 v18, v10 ; GFX1013-NEXT: v_mov_b32_e32 v19, v11 ; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6 +; GFX1013-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX1013-NEXT: v_and_b32_e32 v7, 0xffff, v7 ; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8 ; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX1013-NEXT: v_lshl_or_b32 v6, v10, 16, v6 +; GFX1013-NEXT: v_lshl_or_b32 v7, v11, 16, v7 ; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16 -; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 -; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v11 ; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX1013-NEXT: v_readfirstlane_b32 s4, v18 ; GFX1013-NEXT: v_readfirstlane_b32 s5, v19 @@ -798,24 +798,26 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-NEXT: s_mov_b32 s8, 0x42004600 -; GFX11-NEXT: s_mov_b32 s9, 0x44004700 -; GFX11-NEXT: s_mov_b32 s10, 0x45004800 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_movk_i32 s8, 0x4600 +; GFX11-NEXT: s_movk_i32 s9, 0x4700 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s8, 0x4200 +; GFX11-NEXT: s_pack_ll_b32_b16 s9, s9, 0x4400 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-NEXT: s_mov_b32 s2, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_movk_i32 s3, 0x4800 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_mov_b32 s1, 1.0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX11-NEXT: flat_load_b32 v6, v[0:1] ; GFX11-NEXT: flat_load_b32 v7, v[2:3] +; GFX11-NEXT: s_pack_ll_b32_b16 s10, s3, 0x4500 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s8 ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 ; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v5, s10 @@ -958,25 +960,27 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray ; GFX11-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_movk_i32 s8, 0x4600 +; GFX11-NEXT: s_movk_i32 s9, 0x4700 ; GFX11-NEXT: s_mov_b32 s5, 1.0 ; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s8, 0x42004600 -; GFX11-NEXT: s_mov_b32 s9, 0x44004700 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX11-NEXT: s_mov_b32 s10, 0x45004800 -; GFX11-NEXT: v_mov_b32_e32 v6, 0xb36211c6 -; GFX11-NEXT: v_bfrev_b32_e32 v7, 4.0 -; GFX11-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s8, 0x4200 +; GFX11-NEXT: s_pack_ll_b32_b16 s9, s9, 0x4400 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 +; GFX11-NEXT: s_movk_i32 s7, 0x4800 ; GFX11-NEXT: s_mov_b32 s6, 2.0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: s_pack_ll_b32_b16 s10, s7, 0x4500 +; GFX11-NEXT: v_mov_b32_e32 v3, s8 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v6, 0xb36211c6 :: v_dual_mov_b32 v5, s10 +; GFX11-NEXT: v_bfrev_b32_e32 v7, 4.0 ; GFX11-NEXT: flat_load_b32 v8, v[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_mov_b32_e32 v2, s6 +; GFX11-NEXT: v_mov_b32_e32 v4, s9 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[6:7], v8, v[0:2], v[3:5]], s[0:3] a16 ; GFX11-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll index 3bf55599385e1..9b94f3ee10ef3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll @@ -40,7 +40,7 @@ define amdgpu_ps float @read_raw_buffer(ptr addrspace(1) inreg %p) { ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 4, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %p, i16 0, i32 0, i32 0) @@ -158,7 +158,7 @@ define amdgpu_ps float @general_case_load(ptr inreg %p, i16 inreg %stride, i32 i ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags) @@ -209,7 +209,7 @@ define amdgpu_ps float @general_case_load_with_waterfall(ptr %p, i16 %stride, i3 ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY7]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY7]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -237,7 +237,7 @@ define amdgpu_ps float @read_buffer_fat_ptr_p0(ptr inreg %p) { ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.ptr, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i32 0, i32 0) @@ -256,7 +256,7 @@ define amdgpu_ps float @read_buffer_fat_ptr_p1(ptr addrspace(1) inreg %p) { ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.ptr, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %p, i16 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll index 835fb468bfea4..c914d632a5476 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -32,7 +32,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -53,7 +53,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -69,7 +69,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -92,7 +92,7 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] @@ -113,7 +113,7 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN_RTN]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN_RTN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY8]] @@ -139,7 +139,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -156,7 +156,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_X2_VBUFFER_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -204,7 +204,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -257,7 +257,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -316,7 +316,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -368,7 +368,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_VBUFFER_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_VBUFFER_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -396,7 +396,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -412,7 +412,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -435,7 +435,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -451,7 +451,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_OFFEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index 4973129ed3370..fb8ba59de36fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -19,7 +19,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -38,7 +38,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN]].sub0 ; GFX12-NEXT: $vgpr0 = COPY [[COPY8]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -63,7 +63,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -80,7 +80,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -131,7 +131,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -188,7 +188,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN]].sub0 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -251,7 +251,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -306,7 +306,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -336,7 +336,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -355,7 +355,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN]].sub0 ; GFX12-NEXT: $vgpr0 = COPY [[COPY8]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -385,7 +385,7 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 @@ -413,7 +413,7 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN]].sub0_sub1 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 @@ -447,7 +447,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -468,7 +468,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__ ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -523,7 +523,7 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -589,7 +589,7 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN]].sub0_sub1 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -661,7 +661,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__ ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -720,7 +720,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__ ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -754,7 +754,7 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 @@ -782,7 +782,7 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN]].sub0_sub1 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll index d59c5a6a2609c..f763b41871987 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll @@ -4,7 +4,7 @@ declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) -; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) +; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(f32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x i32>), %{{[0-9]+}}:vgpr(i32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (f32), align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn: ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9:]+}}], s{{[0-9]+}} offen glc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll index b1846b8dbebc9..5fab1254ab3a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -50,7 +50,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset @@ -65,7 +65,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -84,7 +84,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset @@ -98,7 +98,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -117,7 +117,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -231,7 +231,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -286,7 +286,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -336,7 +336,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -364,7 +364,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 @@ -379,7 +379,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -415,7 +415,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -434,7 +434,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -449,7 +449,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -467,7 +467,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -481,7 +481,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0 ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll index 1977712c56e36..51ac6faa6ace5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -16,7 +16,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -31,7 +31,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -46,7 +46,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -65,7 +65,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -80,7 +80,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -106,7 +106,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -131,7 +131,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] @@ -149,7 +149,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 @@ -185,7 +185,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] @@ -235,7 +235,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -286,7 +286,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -337,7 +337,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -365,7 +365,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] @@ -383,7 +383,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 @@ -419,7 +419,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll index f0983501df293..453f9736d9654 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -30,7 +30,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -49,7 +49,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] @@ -67,7 +67,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] @@ -89,7 +89,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 @@ -109,7 +109,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub2 @@ -133,7 +133,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -155,7 +155,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub2 @@ -181,7 +181,7 @@ define amdgpu_ps <4 x i32> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -207,7 +207,7 @@ define amdgpu_ps <4 x i32> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub2 @@ -265,7 +265,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -316,7 +316,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -344,7 +344,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -366,7 +366,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub2 @@ -393,7 +393,7 @@ define amdgpu_ps <4 x i32> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -419,7 +419,7 @@ define amdgpu_ps <4 x i32> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index 8e167b9df5749..eba92eefb8d47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -31,7 +31,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -52,7 +52,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -68,7 +68,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -111,7 +111,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -158,7 +158,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -213,7 +213,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -263,7 +263,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -292,7 +292,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -307,7 +307,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) @@ -327,7 +327,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -342,7 +342,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) @@ -362,7 +362,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -377,7 +377,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) @@ -397,7 +397,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -412,7 +412,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) @@ -432,7 +432,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -447,7 +447,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) @@ -467,7 +467,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -482,7 +482,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) @@ -502,7 +502,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] @@ -520,7 +520,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] @@ -542,7 +542,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 @@ -562,7 +562,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN]].sub2 @@ -586,7 +586,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -608,7 +608,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]].sub2 @@ -634,7 +634,7 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -649,7 +649,7 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -668,7 +668,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -683,7 +683,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -708,7 +708,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] @@ -726,7 +726,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] @@ -748,7 +748,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -763,7 +763,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -784,7 +784,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_SBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_SBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -799,7 +799,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_SBYTE_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_SBYTE_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -820,7 +820,7 @@ define amdgpu_ps float @raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -835,7 +835,7 @@ define amdgpu_ps float @raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -856,7 +856,7 @@ define amdgpu_ps float @raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_SSHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_SSHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -871,7 +871,7 @@ define amdgpu_ps float @raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_SSHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_SSHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -916,7 +916,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -963,7 +963,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1015,7 +1015,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -1062,7 +1062,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1091,7 +1091,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1105,7 +1105,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) @@ -1123,7 +1123,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1137,7 +1137,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -1157,7 +1157,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1171,7 +1171,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4096, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4096, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) @@ -1190,7 +1190,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1205,7 +1205,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 16 @@ -1225,7 +1225,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1240,7 +1240,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -1263,7 +1263,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1278,7 +1278,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4096, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4096, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4096 @@ -1298,7 +1298,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1313,7 +1313,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) @@ -1332,7 +1332,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1347,7 +1347,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) @@ -1368,7 +1368,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1385,7 +1385,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 16 @@ -1407,7 +1407,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1424,7 +1424,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 @@ -1446,7 +1446,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -1463,7 +1463,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 @@ -1509,7 +1509,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -1558,7 +1558,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1614,7 +1614,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -1661,7 +1661,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 5000, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 5000, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll index 8eb05bb9565f3..372327fb77cb0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll @@ -24,7 +24,7 @@ define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspa ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -32,13 +32,13 @@ define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspa ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (i8) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_i8_tfe @@ -57,11 +57,11 @@ define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspa ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1 - ; GFX8-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_i8_tfe @@ -80,11 +80,11 @@ define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspa ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i8) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_i8_tfe @@ -103,11 +103,11 @@ define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspa ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i8) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_i8_tfe @@ -125,11 +125,11 @@ define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspa ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i8) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { i8, i32 } %res, 0 @@ -156,7 +156,7 @@ define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -164,13 +164,13 @@ define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_i16_tfe @@ -189,11 +189,11 @@ define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 - ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_i16_tfe @@ -212,11 +212,11 @@ define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_i16_tfe @@ -235,11 +235,11 @@ define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_i16_tfe @@ -257,11 +257,11 @@ define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { i16, i32 } %res, 0 @@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -296,13 +296,13 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_f16_tfe @@ -321,11 +321,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 - ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_f16_tfe @@ -344,11 +344,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_f16_tfe @@ -367,11 +367,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_f16_tfe @@ -389,11 +389,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { half, i32 } %res, 0 @@ -420,7 +420,7 @@ define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -428,13 +428,13 @@ define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_i32_tfe @@ -453,11 +453,11 @@ define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_i32_tfe @@ -476,11 +476,11 @@ define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i32) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_i32_tfe @@ -499,11 +499,11 @@ define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i32) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_i32_tfe @@ -521,11 +521,11 @@ define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (i32) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { i32, i32 } %res, 0 @@ -552,7 +552,7 @@ define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x i32>), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 @@ -562,13 +562,13 @@ define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i32>) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_v2i32_tfe @@ -587,13 +587,13 @@ define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_v2i32_tfe @@ -612,13 +612,13 @@ define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x i32>), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x i32>) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_v2i32_tfe @@ -637,13 +637,13 @@ define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x i32>), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x i32>) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_v2i32_tfe @@ -661,13 +661,13 @@ define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub1 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub2 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x i32>) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { <2 x i32>, i32 } %res, 0 @@ -694,7 +694,7 @@ define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 @@ -704,13 +704,13 @@ define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x f32>) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_v2f32_tfe @@ -729,13 +729,13 @@ define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x f32>) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_v2f32_tfe @@ -754,13 +754,13 @@ define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x f32>) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_v2f32_tfe @@ -779,13 +779,13 @@ define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x f32>) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_v2f32_tfe @@ -803,13 +803,13 @@ define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub1 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub2 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x f32>) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { <2 x float>, i32 } %res, 0 @@ -836,7 +836,7 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 @@ -848,19 +848,19 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY12]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY12]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i32>) into %ir.data_addr, align 16, addrspace 1) ; GFX6-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (i32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) ; GFX6-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX7-LABEL: name: raw_buffer_load_v3i32_tfe @@ -879,7 +879,7 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 @@ -890,13 +890,13 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x i32>) into %ir.data_addr, align 16, addrspace 1) ; GFX7-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_v3i32_tfe @@ -915,14 +915,14 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_v3i32_tfe @@ -941,14 +941,14 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x i32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_v3i32_tfe @@ -967,14 +967,14 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x i32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_v3i32_tfe @@ -992,14 +992,14 @@ define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub1 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub2 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub3 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x i32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { <3 x i32>, i32 } %res, 0 @@ -1026,31 +1026,30 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2_sub3 + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY12]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x f32>) into %ir.data_addr, align 16, addrspace 1) ; GFX6-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE1]], [[REG_SEQUENCE7]], 0, 8, 0, 0, implicit $exec :: (store (f32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) ; GFX6-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE9:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE8]], %subreg.sub2_sub3 + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE9]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX7-LABEL: name: raw_buffer_load_v3f32_tfe @@ -1069,7 +1068,7 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 @@ -1080,13 +1079,13 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x f32>) into %ir.data_addr, align 16, addrspace 1) ; GFX7-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_v3f32_tfe @@ -1105,14 +1104,14 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x f32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_v3f32_tfe @@ -1131,14 +1130,14 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x f32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_v3f32_tfe @@ -1157,14 +1156,14 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x f32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_v3f32_tfe @@ -1182,14 +1181,14 @@ define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub1 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub2 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub3 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x f32>) into %ir.data_addr, align 16, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { <3 x float>, i32 } %res, 0 @@ -1216,7 +1215,7 @@ define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 @@ -1228,13 +1227,13 @@ define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x i32>) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_v4i32_tfe @@ -1253,15 +1252,15 @@ define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_v4i32_tfe @@ -1280,15 +1279,15 @@ define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3 ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4 ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x i32>) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_v4i32_tfe @@ -1307,15 +1306,15 @@ define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x i32>) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_v4i32_tfe @@ -1333,15 +1332,15 @@ define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub1 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub2 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub3 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub4 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x i32>) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { <4 x i32>, i32 } %res, 0 @@ -1368,7 +1367,7 @@ define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 @@ -1380,13 +1379,13 @@ define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x f32>) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_v4f32_tfe @@ -1405,15 +1404,15 @@ define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x f32>) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_v4f32_tfe @@ -1432,15 +1431,15 @@ define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3 ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4 ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x f32>) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_v4f32_tfe @@ -1459,15 +1458,15 @@ define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x f32>) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_v4f32_tfe @@ -1485,15 +1484,15 @@ define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub0 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub1 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub2 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub3 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub4 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x f32>) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0) %data = extractvalue { <4 x float>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index 7398e347e3397..954e70f75967e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 @@ -46,7 +46,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -64,7 +64,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 @@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 @@ -92,7 +92,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -115,7 +115,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 @@ -130,7 +130,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 @@ -145,7 +145,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -171,7 +171,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 @@ -188,7 +188,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 @@ -205,7 +205,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -255,7 +255,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -304,7 +304,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -353,7 +353,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -385,7 +385,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 @@ -415,7 +415,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 @@ -453,7 +453,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 @@ -468,7 +468,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -491,7 +491,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 @@ -506,7 +506,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 @@ -521,7 +521,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -545,7 +545,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 @@ -560,7 +560,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 @@ -575,7 +575,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -602,7 +602,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 @@ -620,7 +620,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 @@ -635,7 +635,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -690,7 +690,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -742,7 +742,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -791,7 +791,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index 28de527ba7f2a..80529afd3a170 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32 @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -49,7 +49,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32 @@ -63,7 +63,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -84,7 +84,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 @@ -101,7 +101,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -123,7 +123,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 @@ -141,7 +141,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -164,7 +164,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 @@ -183,7 +183,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -229,7 +229,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -280,7 +280,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095 @@ -327,7 +327,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -348,7 +348,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096 @@ -365,7 +365,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -386,7 +386,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16 @@ -403,7 +403,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -425,7 +425,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095 @@ -442,7 +442,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -467,7 +467,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096 @@ -484,7 +484,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -536,7 +536,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -587,7 +587,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 8160ba4932055..3918110508e76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset @@ -32,7 +32,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -54,7 +54,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset @@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -114,7 +114,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -161,7 +161,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -204,7 +204,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -242,7 +242,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -297,7 +297,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -347,7 +347,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -375,7 +375,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc @@ -390,7 +390,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret void @@ -409,7 +409,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc @@ -424,7 +424,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -443,7 +443,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc @@ -458,7 +458,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) ret void @@ -477,7 +477,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc @@ -492,7 +492,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret void @@ -511,7 +511,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc @@ -526,7 +526,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret void @@ -545,7 +545,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc @@ -560,7 +560,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret void @@ -579,7 +579,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc @@ -594,7 +594,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret void @@ -615,7 +615,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 @@ -632,7 +632,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -654,7 +654,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 @@ -672,7 +672,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -695,7 +695,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 @@ -714,7 +714,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -733,7 +733,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8 @@ -748,7 +748,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_BYTE_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_BYTE_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -768,7 +768,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16 @@ -783,7 +783,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -803,7 +803,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 @@ -818,7 +818,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -837,7 +837,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 @@ -852,7 +852,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -873,7 +873,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 @@ -890,7 +890,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -934,7 +934,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -983,7 +983,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1010,7 +1010,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095 @@ -1024,7 +1024,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -1044,7 +1044,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096 @@ -1058,7 +1058,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4096, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4096, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret void @@ -1077,7 +1077,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16 @@ -1092,7 +1092,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -1112,7 +1112,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095 @@ -1127,7 +1127,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -1150,7 +1150,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096 @@ -1165,7 +1165,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -1185,7 +1185,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 @@ -1200,7 +1200,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -1219,7 +1219,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 @@ -1234,7 +1234,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -1253,7 +1253,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16 @@ -1268,7 +1268,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -1288,7 +1288,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095 @@ -1303,7 +1303,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -1326,7 +1326,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096 @@ -1341,7 +1341,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -1388,7 +1388,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -1435,7 +1435,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 5000, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 5000, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1489,7 +1489,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -1535,7 +1535,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 5000, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 5000, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll index d7844c52a51af..c16a95dd1046b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -36,7 +36,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -59,7 +59,7 @@ define amdgpu_ps <2 x float> @raw_ptr_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] @@ -85,7 +85,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i64(i64 %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -133,7 +133,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -192,7 +192,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc_ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -220,7 +220,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -243,7 +243,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll index 3852a02cbf360..6f5970d588afc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__s ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -42,7 +42,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_c ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -93,7 +93,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__v ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -156,7 +156,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_c ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -186,7 +186,7 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__s ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -216,7 +216,7 @@ define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 @@ -250,7 +250,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_c ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -305,7 +305,7 @@ define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -377,7 +377,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_c ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -411,7 +411,7 @@ define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll index 798a3ee1d75fd..be614dc9588ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll @@ -4,7 +4,7 @@ declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) -; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (s32) on %ir.rsrc.load, align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) +; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(f32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x i32>), %{{[0-9]+}}:vgpr(i32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (f32) on %ir.rsrc.load, align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn: ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9:]+}}], s{{[0-9]+}} offen glc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll index 42c0749af5f6e..3fcbcdbce0b63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -50,7 +50,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset @@ -65,7 +65,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -84,7 +84,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset @@ -98,7 +98,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -117,7 +117,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc_ ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -231,7 +231,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc_ ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -286,7 +286,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc_ ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -336,7 +336,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc_ ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -364,7 +364,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 @@ -379,7 +379,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -415,7 +415,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -434,7 +434,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsr ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -449,7 +449,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsr ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -467,7 +467,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsr ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -481,7 +481,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsr ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll index cf059da089e50..296f212aa2f4b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll @@ -15,7 +15,7 @@ define amdgpu_ps half @raw_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__ ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -30,7 +30,7 @@ define amdgpu_ps half @raw_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -49,7 +49,7 @@ define amdgpu_ps <2 x half> @raw_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgpr_v ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -64,7 +64,7 @@ define amdgpu_ps <2 x half> @raw_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgpr_v ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -100,7 +100,7 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_v ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] @@ -118,7 +118,7 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_v ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 @@ -186,7 +186,7 @@ define amdgpu_ps half @raw_ptr_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__ ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -237,7 +237,7 @@ define amdgpu_ps half @raw_ptr_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__ ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -265,7 +265,7 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_v ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] @@ -283,7 +283,7 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_v ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll index d9c61674d2df5..be22f172ccc7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll @@ -14,7 +14,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -33,7 +33,7 @@ define amdgpu_ps <2 x float> @raw_ptr_buffer_load_format_v2f32__sgpr_rsrc__vgpr_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] @@ -55,7 +55,7 @@ define amdgpu_ps <3 x float> @raw_ptr_buffer_load_format_v3f32__sgpr_rsrc__vgpr_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 @@ -79,7 +79,7 @@ define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32__sgpr_rsrc__vgpr_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -133,7 +133,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset_ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -161,7 +161,7 @@ define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32__sgpr_rsrc__vgpr_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll index 06259815a9223..036a8c51c246a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -36,7 +36,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -79,7 +79,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -134,7 +134,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_s ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -163,7 +163,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 1) @@ -183,7 +183,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) @@ -203,7 +203,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 4) @@ -223,7 +223,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 6) @@ -243,7 +243,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 5) @@ -263,7 +263,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 7) @@ -282,7 +282,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (volatile dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (volatile dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 -2147483648) @@ -302,7 +302,7 @@ define amdgpu_ps <2 x float> @raw_ptr_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] @@ -324,7 +324,7 @@ define amdgpu_ps <3 x float> @raw_ptr_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 @@ -348,7 +348,7 @@ define amdgpu_ps <4 x float> @raw_ptr_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -374,7 +374,7 @@ define amdgpu_ps half @raw_ptr_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -393,7 +393,7 @@ define amdgpu_ps <2 x half> @raw_ptr_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -418,7 +418,7 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset_ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] @@ -440,7 +440,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8) from %ir.rsrc, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -461,7 +461,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_SBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_SBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8) from %ir.rsrc, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -482,7 +482,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -503,7 +503,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_SSHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_SSHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -548,7 +548,7 @@ define amdgpu_ps half @raw_ptr_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -600,7 +600,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8) from %ir.rsrc, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -629,7 +629,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) @@ -647,7 +647,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) @@ -667,7 +667,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4096, i32 %soffset, i32 0) @@ -686,7 +686,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 16 @@ -706,7 +706,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -729,7 +729,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4096 @@ -749,7 +749,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 0) @@ -768,7 +768,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 0) @@ -789,7 +789,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 16 @@ -811,7 +811,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 @@ -833,7 +833,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 @@ -879,7 +879,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -935,7 +935,7 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll index fa0af33281ed4..14119b3a31d98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 @@ -30,7 +30,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -48,7 +48,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__voffset ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 @@ -62,7 +62,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__voffset ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.f16(half %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -85,7 +85,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 @@ -100,7 +100,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -126,7 +126,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 @@ -143,7 +143,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -193,7 +193,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -242,7 +242,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -274,7 +274,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 @@ -289,7 +289,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -312,7 +312,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 @@ -327,7 +327,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -350,7 +350,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 @@ -365,7 +365,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -389,7 +389,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 @@ -404,7 +404,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -431,7 +431,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 @@ -449,7 +449,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -504,7 +504,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -556,7 +556,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll index fb974a835164a..4c59edd289bdc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -33,7 +33,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__voffset ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -54,7 +54,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -76,7 +76,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f32(<3 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -99,7 +99,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -145,7 +145,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -175,7 +175,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -196,7 +196,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -217,7 +217,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -239,7 +239,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -264,7 +264,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -316,7 +316,7 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll index ec0bd1f9ca4ea..10628e448cb6a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -38,7 +38,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -81,7 +81,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -124,7 +124,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -207,7 +207,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 1) ret void @@ -226,7 +226,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -245,7 +245,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 3) ret void @@ -264,7 +264,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 4) ret void @@ -283,7 +283,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 6) ret void @@ -302,7 +302,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 5) ret void @@ -321,7 +321,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 7) ret void @@ -340,7 +340,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (volatile dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (volatile dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 -2147483648) ret void @@ -361,7 +361,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -383,7 +383,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -406,7 +406,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -425,7 +425,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %val.trunc, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -445,7 +445,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %val.trunc, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -465,7 +465,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -484,7 +484,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -505,7 +505,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -549,7 +549,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -576,7 +576,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -596,7 +596,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 4096, i32 %soffset, i32 0) ret void @@ -615,7 +615,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -635,7 +635,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -658,7 +658,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -678,7 +678,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -697,7 +697,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -716,7 +716,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -736,7 +736,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -759,7 +759,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -806,7 +806,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -860,7 +860,7 @@ define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll index 615543cf7ed51..541082e265693 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll @@ -14,7 +14,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -33,7 +33,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] @@ -55,7 +55,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 @@ -79,7 +79,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -132,7 +132,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -160,7 +160,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) @@ -179,7 +179,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) @@ -198,7 +198,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) @@ -217,7 +217,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll index 99bc50eaf3a06..fc61a8a973bdb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -30,7 +30,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -53,7 +53,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -68,7 +68,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -100,7 +100,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -117,7 +117,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -160,7 +160,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -207,7 +207,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -262,7 +262,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -312,7 +312,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -368,7 +368,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -419,7 +419,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -447,7 +447,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc @@ -462,7 +462,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -481,7 +481,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -496,7 +496,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -515,7 +515,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc @@ -530,7 +530,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -549,7 +549,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc @@ -564,7 +564,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll index cc70c27aa48f1..1dde4431e35de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -30,7 +30,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.i8(i8 %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -73,7 +73,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -120,7 +120,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -175,7 +175,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -225,7 +225,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -281,7 +281,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -332,7 +332,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll index 5092060602bd4..75bf469a016ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -38,7 +38,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -61,7 +61,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -85,7 +85,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -106,7 +106,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -149,7 +149,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -204,7 +204,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -260,7 +260,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -289,7 +289,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -329,7 +329,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -349,7 +349,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void @@ -369,7 +369,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 94, i32 0) ret void @@ -387,7 +387,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) ret void @@ -407,7 +407,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 4096, i32 %soffset, i32 94, i32 0) ret void @@ -426,7 +426,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 16 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -446,7 +446,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -469,7 +469,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4096 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -489,7 +489,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 94, i32 0) ret void @@ -508,7 +508,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 94, i32 0) ret void @@ -529,7 +529,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 16 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -551,7 +551,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4095 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -573,7 +573,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4096 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -619,7 +619,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -675,7 +675,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll index f6670baea089c..13117261fa72f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -30,7 +30,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -49,7 +49,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[COPY6]] @@ -67,7 +67,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] @@ -89,7 +89,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 @@ -109,7 +109,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub2 @@ -133,7 +133,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -155,7 +155,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub2 @@ -208,7 +208,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; GFX10_GFX11-NEXT: bb.3: ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10_GFX11-NEXT: {{ $}} - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX10_GFX11-NEXT: {{ $}} @@ -259,7 +259,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -287,7 +287,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -302,7 +302,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) @@ -321,7 +321,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -336,7 +336,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) @@ -355,7 +355,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -370,7 +370,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) @@ -389,7 +389,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -404,7 +404,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll index cb622d250df3c..d8620583e95c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -46,7 +46,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -69,7 +69,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -84,7 +84,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -99,7 +99,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_XY_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -148,7 +148,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -165,7 +165,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -208,7 +208,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -255,7 +255,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -302,7 +302,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -357,7 +357,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -407,7 +407,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -457,7 +457,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -513,7 +513,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -564,7 +564,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -615,7 +615,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -643,7 +643,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc @@ -658,7 +658,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc @@ -673,7 +673,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -692,7 +692,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -707,7 +707,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -722,7 +722,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -741,7 +741,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc @@ -756,7 +756,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc @@ -771,7 +771,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -790,7 +790,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc @@ -805,7 +805,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc @@ -820,7 +820,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_D16_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll index 1e61db7acff64..37e4ddb59cea3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -46,7 +46,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -89,7 +89,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -136,7 +136,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -183,7 +183,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -238,7 +238,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -288,7 +288,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -338,7 +338,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -394,7 +394,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -445,7 +445,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -496,7 +496,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll index 8d82772044794..d33c3db1efba1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -32,7 +32,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -54,7 +54,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX10_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -71,7 +71,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -94,7 +94,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX10_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -112,7 +112,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -136,7 +136,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX10_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX10_GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -155,7 +155,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -176,7 +176,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset @@ -192,7 +192,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -235,7 +235,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: bb.3: ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10_GFX11-NEXT: {{ $}} - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX10_GFX11-NEXT: {{ $}} @@ -282,7 +282,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -337,7 +337,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; GFX10_GFX11-NEXT: bb.3: ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10_GFX11-NEXT: {{ $}} - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX10_GFX11-NEXT: {{ $}} @@ -387,7 +387,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -443,7 +443,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; GFX10_GFX11-NEXT: bb.3: ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10_GFX11-NEXT: {{ $}} - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX10_GFX11-NEXT: {{ $}} @@ -494,7 +494,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -523,7 +523,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc @@ -538,7 +538,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -558,7 +558,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -573,7 +573,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -593,7 +593,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc @@ -608,7 +608,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -628,7 +628,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc @@ -643,7 +643,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void @@ -663,7 +663,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0 @@ -677,7 +677,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0) ret void @@ -695,7 +695,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095 @@ -709,7 +709,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) ret void @@ -729,7 +729,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096 @@ -743,7 +743,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4096, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4096, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0) ret void @@ -762,7 +762,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16 @@ -777,7 +777,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -797,7 +797,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 @@ -812,7 +812,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -835,7 +835,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX10_GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096 @@ -850,7 +850,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -870,7 +870,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095 @@ -885,7 +885,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0) ret void @@ -904,7 +904,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096 @@ -919,7 +919,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0) ret void @@ -940,7 +940,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX10_GFX11-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16 @@ -957,7 +957,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -979,7 +979,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX10_GFX11-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095 @@ -996,7 +996,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -1018,7 +1018,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX10_GFX11-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096 @@ -1035,7 +1035,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -1081,7 +1081,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: bb.3: ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10_GFX11-NEXT: {{ $}} - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX10_GFX11-NEXT: {{ $}} @@ -1130,7 +1130,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1186,7 +1186,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX10_GFX11-NEXT: bb.3: ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10_GFX11-NEXT: {{ $}} - ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX10_GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX10_GFX11-NEXT: {{ $}} @@ -1233,7 +1233,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 5000, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 5000, 94, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index daa1923fb0d58..837e26d638100 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -18,7 +18,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -34,7 +34,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -50,7 +50,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -66,7 +66,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -86,7 +86,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -102,7 +102,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -118,7 +118,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -134,7 +134,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 1 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 1 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -154,7 +154,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i64)) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -175,7 +175,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i64)) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -196,7 +196,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i64)) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -217,7 +217,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (s64)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (i64)) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR_IMM]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR_IMM]].sub1 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -242,7 +242,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 16) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i96), align 16) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 @@ -268,7 +268,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 16) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i96), align 16) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 @@ -294,7 +294,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 16) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i96), align 16) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 @@ -320,7 +320,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_SGPR_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (s96), align 16) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_SGPR_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (i96), align 16) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX3_SGPR_IMM]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX3_SGPR_IMM]].sub1 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX3_SGPR_IMM]].sub2 @@ -349,7 +349,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i256)) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 @@ -394,7 +394,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i256)) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 @@ -439,7 +439,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i256)) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 @@ -484,7 +484,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (s256)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (i256)) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR_IMM]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR_IMM]].sub1 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR_IMM]].sub2 @@ -533,7 +533,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i512)) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 @@ -610,7 +610,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i512)) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 @@ -687,7 +687,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (i512)) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 @@ -764,7 +764,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR_IMM:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (s512)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR_IMM:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR_IMM [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load (i512)) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR_IMM]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR_IMM]].sub1 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR_IMM]].sub2 @@ -845,7 +845,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -861,7 +861,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -876,7 +876,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -891,7 +891,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -910,7 +910,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -925,7 +925,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -940,7 +940,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -955,7 +955,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -975,7 +975,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -991,7 +991,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1006,7 +1006,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1021,7 +1021,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1040,7 +1040,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1055,7 +1055,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1070,7 +1070,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1085,7 +1085,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1104,7 +1104,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1119,7 +1119,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1134,7 +1134,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1149,7 +1149,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1169,7 +1169,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1185,7 +1185,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1200,7 +1200,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1215,7 +1215,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1235,7 +1235,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1250,7 +1250,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1265,7 +1265,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1280,7 +1280,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1300,7 +1300,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1316,7 +1316,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1331,7 +1331,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1346,7 +1346,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1366,7 +1366,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1382,7 +1382,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1398,7 +1398,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1414,7 +1414,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1434,7 +1434,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1449,7 +1449,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1465,7 +1465,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1481,7 +1481,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1501,7 +1501,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1516,7 +1516,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1532,7 +1532,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1548,7 +1548,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1568,7 +1568,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1583,7 +1583,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1599,7 +1599,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1615,7 +1615,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1635,7 +1635,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1650,7 +1650,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1666,7 +1666,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1682,7 +1682,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 1 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 1 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1702,7 +1702,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1717,7 +1717,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1733,7 +1733,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1749,7 +1749,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1769,7 +1769,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1784,7 +1784,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1800,7 +1800,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1815,7 +1815,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 2097152, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 2097152, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1835,7 +1835,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1850,7 +1850,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1866,7 +1866,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1881,7 +1881,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1048576, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1048576, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1901,7 +1901,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1916,7 +1916,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1932,7 +1932,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1948,7 +1948,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1968,7 +1968,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1983,7 +1983,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1998,7 +1998,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -2013,7 +2013,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -2033,7 +2033,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -2048,7 +2048,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -2064,7 +2064,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -2080,7 +2080,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -2102,7 +2102,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2117,7 +2117,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2132,7 +2132,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2146,7 +2146,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2165,7 +2165,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i64), align 4) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] @@ -2183,7 +2183,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i64), align 4) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] @@ -2201,7 +2201,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i64), align 4) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] @@ -2218,7 +2218,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i64), align 4) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY5]] @@ -2240,7 +2240,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -2261,7 +2261,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -2282,7 +2282,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -2302,7 +2302,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s96), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i96), align 4) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_OFFEN]].sub2 @@ -2326,7 +2326,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -2348,7 +2348,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -2370,7 +2370,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -2391,7 +2391,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]].sub1 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]].sub2 @@ -2417,8 +2417,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2449,8 +2449,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2481,8 +2481,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2512,8 +2512,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2548,10 +2548,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2598,10 +2598,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2648,10 +2648,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2697,10 +2697,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 32, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 48, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2751,7 +2751,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2766,7 +2766,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2781,7 +2781,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2795,7 +2795,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 @@ -2815,7 +2815,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2830,7 +2830,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2845,7 +2845,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2859,7 +2859,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 @@ -2879,7 +2879,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2894,7 +2894,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2909,7 +2909,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -2923,7 +2923,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 @@ -2944,8 +2944,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2976,8 +2976,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3008,8 +3008,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3039,8 +3039,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3077,8 +3077,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3109,8 +3109,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3141,8 +3141,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3172,8 +3172,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4068, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4084, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4068, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4084, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3209,10 +3209,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3259,10 +3259,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3309,10 +3309,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3358,10 +3358,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3413,10 +3413,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3463,10 +3463,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3513,10 +3513,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3562,10 +3562,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4036, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4052, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4068, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4084, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4036, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4052, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4068, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], $sgpr_null, 4084, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -3642,7 +3642,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3690,7 +3690,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3738,7 +3738,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3785,7 +3785,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY5]], [[REG_SEQUENCE1]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY5]], [[REG_SEQUENCE1]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -3836,7 +3836,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3882,7 +3882,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3928,7 +3928,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3974,7 +3974,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -4030,7 +4030,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4080,7 +4080,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4130,7 +4130,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4176,7 +4176,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -4228,7 +4228,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32) from unknown-address + 4095, align 1) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4274,7 +4274,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32) from unknown-address + 4095, align 1) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4320,7 +4320,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32) from unknown-address + 4095, align 1) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4365,7 +4365,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32) from unknown-address + 4095, align 1) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -4418,7 +4418,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4466,7 +4466,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4512,7 +4512,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (i32) from unknown-address + 4096) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4557,7 +4557,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (i32) from unknown-address + 4096) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -4609,8 +4609,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4672,8 +4672,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4735,8 +4735,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4798,8 +4798,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -4872,8 +4872,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4939,8 +4939,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -5006,8 +5006,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -5069,8 +5069,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4068, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4084, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4068, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4084, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -5141,8 +5141,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -5208,8 +5208,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -5275,8 +5275,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -5338,8 +5338,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4112, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4112, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -5407,8 +5407,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -5471,8 +5471,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -5535,8 +5535,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -5598,8 +5598,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 5000, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 5016, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 5000, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 5016, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -5667,8 +5667,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -5731,8 +5731,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -5795,8 +5795,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -5858,8 +5858,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4076, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4076, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -5927,8 +5927,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -5991,8 +5991,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -6055,8 +6055,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -6118,8 +6118,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE1]], $sgpr_null, 4096, 0, 0, implicit $exec :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -6186,8 +6186,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -6249,8 +6249,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -6312,8 +6312,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -6374,8 +6374,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_OFFSET [[REG_SEQUENCE1]], $sgpr_null, 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -6419,7 +6419,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6434,7 +6434,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6449,7 +6449,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6464,7 +6464,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s @@ -6484,7 +6484,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6499,7 +6499,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6514,7 +6514,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6529,7 +6529,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v @@ -6552,7 +6552,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6570,7 +6570,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6588,7 +6588,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6605,7 +6605,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s @@ -6629,7 +6629,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6647,7 +6647,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6665,7 +6665,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6682,7 +6682,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v @@ -6707,7 +6707,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6725,7 +6725,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6743,7 +6743,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6760,7 +6760,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 @@ -6784,7 +6784,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6802,7 +6802,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6820,7 +6820,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6837,7 +6837,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], $sgpr_null, 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 @@ -6859,7 +6859,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX6-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6875,7 +6875,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX7-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6891,7 +6891,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -6907,7 +6907,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX12-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc - ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_OR_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_OR_B32_]], 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = or i32 %offset.s, -2147483648 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll index e2dab03e410aa..1dcf33bd4f522 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll @@ -276,8 +276,7 @@ define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { ; GFX906-LABEL: v_sdot2_fnegv2f16_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 +; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: v_sdot2_fnegv2f16_c: @@ -291,8 +290,7 @@ define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { ; GFX10-LABEL: v_sdot2_fnegv2f16_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 +; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.c = fneg <2 x half> %c %cast.neg.c = bitcast <2 x half> %neg.c to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll index 06560afee3c9a..702f576490c2f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll @@ -112,8 +112,7 @@ define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { ; GFX906-LABEL: v_sdot4_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX906-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 +; GFX906-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_sdot4_fnegv2f16_a: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll index 0d729351f65a7..c67f2c1c64897 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll @@ -67,15 +67,13 @@ define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { ; GFX906-LABEL: v_sdot8_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 +; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_sdot8_fnegv2f16_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 +; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a %cast.neg.a = bitcast <2 x half> %neg.a to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll index abee7de151fd4..a57687fa1db8c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -18,7 +18,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -36,7 +36,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -59,7 +59,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -77,7 +77,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -102,7 +102,7 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] @@ -125,7 +125,7 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN_RTN]].sub0 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN_RTN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY9]] @@ -153,7 +153,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -172,7 +172,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_X2_VBUFFER_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -223,7 +223,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -279,7 +279,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -341,7 +341,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -396,7 +396,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -427,7 +427,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -445,7 +445,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_VBUFFER_BOTHEN_RTN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index 1c00ffbf21abb..621e50b993d5d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -20,7 +20,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -41,7 +41,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN]].sub0 ; GFX12-NEXT: $vgpr0 = COPY [[COPY9]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -68,7 +68,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -87,7 +87,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -141,7 +141,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -201,7 +201,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN]].sub0 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -267,7 +267,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -325,7 +325,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -357,7 +357,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -378,7 +378,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_BOTHEN_RTN]].sub0 ; GFX12-NEXT: $vgpr0 = COPY [[COPY9]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -410,7 +410,7 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__s ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 @@ -440,7 +440,7 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__s ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN]].sub0_sub1 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 @@ -476,7 +476,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cm ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -499,7 +499,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cm ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -557,7 +557,7 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__v ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -626,7 +626,7 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__v ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN]].sub0_sub1 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -701,7 +701,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cm ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 - ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -763,7 +763,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cm ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 - ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -799,7 +799,7 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__s ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 @@ -829,7 +829,7 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__s ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i64), align 1, addrspace 8) ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_BOTHEN_RTN]].sub0_sub1 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll index 89daf3ae88cbc..6725704e75257 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll @@ -1,7 +1,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s ; RUN: not --crash llc -global-isel < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 -; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) +; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(f32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x i32>), %{{[0-9]+}}:vgpr(i32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (f32), align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll index c0027642655a6..b8a9ddc53b262 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -18,7 +18,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -35,7 +35,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -56,7 +56,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset @@ -73,7 +73,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) @@ -93,7 +93,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset @@ -108,7 +108,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) ret void @@ -128,7 +128,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -143,7 +143,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -194,7 +194,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -249,7 +249,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -306,7 +306,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -358,7 +358,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -389,7 +389,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -406,7 +406,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -425,7 +425,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc @@ -440,7 +440,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) ret void @@ -461,7 +461,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -478,7 +478,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -497,7 +497,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -512,7 +512,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>), align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll index 98a2780e03b81..d3a938043dfc1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -17,7 +17,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -34,7 +34,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -51,7 +51,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -72,7 +72,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -100,7 +100,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -117,7 +117,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -144,7 +144,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 @@ -182,7 +182,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]] @@ -202,7 +202,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] @@ -255,7 +255,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -330,7 +330,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -387,7 +387,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -420,7 +420,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -437,7 +437,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -454,7 +454,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -476,7 +476,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -493,7 +493,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -510,7 +510,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index c2ab42b08f477..37c664c8f5df6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -34,7 +34,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -55,7 +55,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] @@ -75,7 +75,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] @@ -99,7 +99,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 @@ -121,7 +121,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN]].sub2 @@ -147,7 +147,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 @@ -171,7 +171,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN]].sub2 @@ -228,7 +228,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -289,7 +289,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -326,7 +326,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -343,7 +343,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -365,7 +365,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -382,7 +382,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -408,15 +408,15 @@ define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub0 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub1 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub2 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub3 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub4 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.value, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>) into %ir.value, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.status, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_load_format_v4i32_tfe @@ -436,15 +436,15 @@ define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN]].sub0 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN]].sub1 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN]].sub2 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN]].sub3 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_VBUFFER_IDXEN]].sub4 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.value, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.status, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x i32>) into %ir.value, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (i32) into %ir.status, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) @@ -475,14 +475,14 @@ define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub0 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub1 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub2 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub3 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.value, align 16, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>) into %ir.value, align 16, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.status, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_load_format_v3i32_tfe @@ -502,14 +502,14 @@ define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN]].sub0 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN]].sub1 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN]].sub2 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_VBUFFER_IDXEN]].sub3 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.value, align 16, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.status, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x i32>) into %ir.value, align 16, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (i32) into %ir.status, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) @@ -540,11 +540,11 @@ define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub0 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.value, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.value, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.status, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_load_format_i32_tfe @@ -564,11 +564,11 @@ define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (i32), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_VBUFFER_IDXEN]].sub0 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.value, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.status, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (i32) into %ir.value, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.status, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll index 588b0204619f0..c3fdd5493335c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -34,7 +34,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -56,7 +56,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] @@ -76,7 +76,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] @@ -101,7 +101,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 @@ -123,7 +123,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN]].sub2 @@ -150,7 +150,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 @@ -174,7 +174,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub2 @@ -204,7 +204,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -222,7 +222,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) @@ -244,7 +244,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -261,7 +261,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -283,7 +283,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -300,7 +300,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) @@ -350,7 +350,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -404,7 +404,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -434,7 +434,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -451,7 +451,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -474,7 +474,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_SBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_SBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -491,7 +491,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -514,7 +514,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext_wrong_width(<4 x i32> inreg % ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX8-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 4, implicit $exec ; GFX8-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -532,7 +532,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext_wrong_width(<4 x i32> inreg % ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8), addrspace 8) ; GFX12-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN]], 0, 4, implicit $exec ; GFX12-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -557,7 +557,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -574,7 +574,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -597,7 +597,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_SSHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_SSHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -614,7 +614,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -637,7 +637,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext_wrong_width(<4 x i32> inreg ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX8-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 8, implicit $exec ; GFX8-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -655,7 +655,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext_wrong_width(<4 x i32> inreg ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16), align 1, addrspace 8) ; GFX12-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN]], 0, 8, implicit $exec ; GFX12-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -681,7 +681,7 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -698,7 +698,7 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -720,7 +720,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -737,7 +737,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -765,7 +765,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] @@ -785,7 +785,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub1 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] @@ -810,7 +810,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -827,7 +827,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.ll index 62254af0a5930..8944b0ca14ee3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.ll @@ -7,281 +7,281 @@ ; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11 ; RUN: llc -global-isel -mcpu=gfx1200 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 -define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_i8_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_i8_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 - ; GFX8-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_i8_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_i8_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_i8_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { i8, i32 } @llvm.amdgcn.struct.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { i8, i32 } %res, 0 - store i8 %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { i8, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_i8_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_i8_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_i8_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_i8_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_i8_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { i8, i32 } @llvm.amdgcn.struct.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { i8, i32 } %res, 0 +; store i8 %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { i8, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_i16_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_i16_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_i16_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_i16_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_i16_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { i16, i32 } @llvm.amdgcn.struct.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { i16, i32 } %res, 0 - store i16 %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { i16, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_i16_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_i16_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_i16_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_i16_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_i16_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { i16, i32 } @llvm.amdgcn.struct.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { i16, i32 } %res, 0 +; store i16 %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { i16, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { ; GFX67-LABEL: name: raw_buffer_load_f16_tfe @@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX67-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 @@ -309,13 +309,13 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX67-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: raw_buffer_load_f16_tfe @@ -335,11 +335,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX910-LABEL: name: raw_buffer_load_f16_tfe @@ -359,11 +359,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX910-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX910-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: raw_buffer_load_f16_tfe @@ -383,11 +383,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX11-NEXT: [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: raw_buffer_load_f16_tfe @@ -407,11 +407,11 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) + ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (f16), align 1, addrspace 8) ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub0 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (i16) into %ir.data_addr, addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (i32) into %ir.tfe_addr, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %res = call { half, i32 } @llvm.amdgcn.struct.buffer.load.sl_f16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { half, i32 } %res, 0 @@ -421,1149 +421,1149 @@ define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrsp ret void } -define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_i32_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_i32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_i32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_i32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_i32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { i32, i32 } %res, 0 - store i32 %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { i32, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_i32_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_i32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_i32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_i32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_i32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { i32, i32 } %res, 0 +; store i32 %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { i32, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_v2i32_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_v2i32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_v2i32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_v2i32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_v2i32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub2 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { <2 x i32>, i32 } %res, 0 - store <2 x i32> %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { <2 x i32>, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_v2i32_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_v2i32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_v2i32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_v2i32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_v2i32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub2 +; ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { <2 x i32>, i32 } %res, 0 +; store <2 x i32> %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { <2 x i32>, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_v2f32_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_v2f32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_v2f32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_v2f32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_v2f32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub2 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { <2 x float>, i32 } %res, 0 - store <2 x float> %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { <2 x float>, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_v2f32_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_v2f32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_v2f32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX910-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_v2f32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2 +; ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_v2f32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub2 +; ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 +; ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { <2 x float>, i32 } %res, 0 +; store <2 x float> %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { <2 x float>, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX6-LABEL: name: raw_buffer_load_v3i32_tfe - ; GFX6: bb.1 (%ir-block.0): - ; GFX6-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0_sub1 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2_sub3 - ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY13]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX6-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) - ; GFX6-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX6-NEXT: S_ENDPGM 0 - ; - ; GFX7-LABEL: name: raw_buffer_load_v3i32_tfe - ; GFX7: bb.1 (%ir-block.0): - ; GFX7-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX7-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX7-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX7-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_v3i32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_v3i32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_v3i32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_v3i32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub2 - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub3 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { <3 x i32>, i32 } %res, 0 - store <3 x i32> %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { <3 x i32>, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX6-LABEL: name: raw_buffer_load_v3i32_tfe +; ; GFX6: bb.1 (%ir-block.0): +; ; GFX6-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX6-NEXT: {{ $}} +; ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0_sub1 +; ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2_sub3 +; ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 +; ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY13]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX6-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 +; ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) +; ; GFX6-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3 +; ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX6-NEXT: S_ENDPGM 0 +; ; +; ; GFX7-LABEL: name: raw_buffer_load_v3i32_tfe +; ; GFX7: bb.1 (%ir-block.0): +; ; GFX7-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX7-NEXT: {{ $}} +; ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX7-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 +; ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX7-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX7-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX7-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX7-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX7-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 +; ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX7-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_v3i32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_v3i32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_v3i32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_v3i32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub2 +; ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub3 +; ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { <3 x i32>, i32 } %res, 0 +; store <3 x i32> %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { <3 x i32>, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX6-LABEL: name: raw_buffer_load_v3f32_tfe - ; GFX6: bb.1 (%ir-block.0): - ; GFX6-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0_sub1 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2_sub3 - ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY13]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX6-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) - ; GFX6-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 - ; GFX6-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX6-NEXT: S_ENDPGM 0 - ; - ; GFX7-LABEL: name: raw_buffer_load_v3f32_tfe - ; GFX7: bb.1 (%ir-block.0): - ; GFX7-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX7-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX7-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX7-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_v3f32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_v3f32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_v3f32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_v3f32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub2 - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub3 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { <3 x float>, i32 } %res, 0 - store <3 x float> %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { <3 x float>, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX6-LABEL: name: raw_buffer_load_v3f32_tfe +; ; GFX6: bb.1 (%ir-block.0): +; ; GFX6-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX6-NEXT: {{ $}} +; ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0_sub1 +; ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2_sub3 +; ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 +; ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY13]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX6-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX6-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX6-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3 +; ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1) +; ; GFX6-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX6-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX6-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1 +; ; GFX6-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX6-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3 +; ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX6-NEXT: S_ENDPGM 0 +; ; +; ; GFX7-LABEL: name: raw_buffer_load_v3f32_tfe +; ; GFX7: bb.1 (%ir-block.0): +; ; GFX7-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX7-NEXT: {{ $}} +; ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX7-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 +; ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX7-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX7-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX7-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX7-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX7-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 +; ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX7-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_v3f32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_v3f32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX910-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_v3f32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2 +; ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3 +; ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX11-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_v3f32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub2 +; ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub3 +; ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 +; ; GFX12-NEXT: GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { <3 x float>, i32 } %res, 0 +; store <3 x float> %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { <3 x float>, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_v4i32_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX67-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX67-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY13]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_v4i32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_v4i32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX910-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_v4i32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_v4i32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub2 - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub3 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub4 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { <4 x i32>, i32 } %res, 0 - store <4 x i32> %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { <4 x i32>, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_v4i32_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX67-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX67-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY13]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_v4i32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_v4i32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX910-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_v4i32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_v4i32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub2 +; ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub3 +; ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub4 +; ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { <4 x i32>, i32 } %res, 0 +; store <4 x i32> %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { <4 x i32>, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } -define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { - ; GFX67-LABEL: name: raw_buffer_load_v4f32_tfe - ; GFX67: bb.1 (%ir-block.0): - ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX67-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX67-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 - ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 - ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY13]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX67-NEXT: S_ENDPGM 0 - ; - ; GFX8-LABEL: name: raw_buffer_load_v4f32_tfe - ; GFX8: bb.1 (%ir-block.0): - ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0 - ; - ; GFX910-LABEL: name: raw_buffer_load_v4f32_tfe - ; GFX910: bb.1 (%ir-block.0): - ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX910-NEXT: {{ $}} - ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX910-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX910-NEXT: S_ENDPGM 0 - ; - ; GFX11-LABEL: name: raw_buffer_load_v4f32_tfe - ; GFX11: bb.1 (%ir-block.0): - ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX11-NEXT: S_ENDPGM 0 - ; - ; GFX12-LABEL: name: raw_buffer_load_v4f32_tfe - ; GFX12: bb.1 (%ir-block.0): - ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub0 - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub1 - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub2 - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub3 - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub4 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) - ; GFX12-NEXT: S_ENDPGM 0 - %res = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %data = extractvalue { <4 x float>, i32 } %res, 0 - store <4 x float> %data, ptr addrspace(1) %data_addr - %tfe = extractvalue { <4 x float>, i32 } %res, 1 - store i32 %tfe, ptr addrspace(1) %tfe_addr - ret void -} +; define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) { +; ; GFX67-LABEL: name: raw_buffer_load_v4f32_tfe +; ; GFX67: bb.1 (%ir-block.0): +; ; GFX67-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX67-NEXT: {{ $}} +; ; GFX67-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX67-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX67-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX67-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX67-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX67-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX67-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX67-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX67-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX67-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX67-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX67-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX67-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX67-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX67-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX67-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX67-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX67-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX67-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX67-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 +; ; GFX67-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1 +; ; GFX67-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 +; ; GFX67-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3 +; ; GFX67-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY13]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX67-NEXT: S_ENDPGM 0 +; ; +; ; GFX8-LABEL: name: raw_buffer_load_v4f32_tfe +; ; GFX8: bb.1 (%ir-block.0): +; ; GFX8-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX8-NEXT: {{ $}} +; ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX8-NEXT: S_ENDPGM 0 +; ; +; ; GFX910-LABEL: name: raw_buffer_load_v4f32_tfe +; ; GFX910: bb.1 (%ir-block.0): +; ; GFX910-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX910-NEXT: {{ $}} +; ; GFX910-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX910-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX910-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX910-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX910-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX910-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX910-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX910-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX910-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX910-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX910-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX910-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX910-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX910-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX910-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX910-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX910-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX910-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX910-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX910-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX910-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX910-NEXT: S_ENDPGM 0 +; ; +; ; GFX11-LABEL: name: raw_buffer_load_v4f32_tfe +; ; GFX11: bb.1 (%ir-block.0): +; ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX11-NEXT: {{ $}} +; ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX11-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0 +; ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1 +; ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2 +; ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3 +; ; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4 +; ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX11-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX11-NEXT: S_ENDPGM 0 +; ; +; ; GFX12-LABEL: name: raw_buffer_load_v4f32_tfe +; ; GFX12: bb.1 (%ir-block.0): +; ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; ; GFX12-NEXT: {{ $}} +; ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +; ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +; ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +; ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 +; ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +; ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +; ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 +; ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +; ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +; ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 +; ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] +; ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) +; ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub0 +; ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub1 +; ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub2 +; ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub3 +; ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub4 +; ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 +; ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1) +; ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1) +; ; GFX12-NEXT: S_ENDPGM 0 +; %res = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) +; %data = extractvalue { <4 x float>, i32 } %res, 0 +; store <4 x float> %data, ptr addrspace(1) %data_addr +; %tfe = extractvalue { <4 x float>, i32 } %res, 1 +; store i32 %tfe, ptr addrspace(1) %tfe_addr +; ret void +; } declare { i8, i32 } @llvm.amdgcn.struct.buffer.load.sl_i8i32s(<4 x i32>, i32, i32, i32, i32) declare { i16, i32 } @llvm.amdgcn.struct.buffer.load.sl_i16i32s(<4 x i32>, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll index 692381008e6fb..d4bc1b981cb93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -18,7 +18,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -35,7 +35,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -52,7 +52,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -77,7 +77,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -94,7 +94,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -111,7 +111,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XY_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -145,7 +145,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -164,7 +164,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -183,7 +183,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -233,7 +233,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -288,7 +288,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -343,7 +343,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -373,7 +373,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -390,7 +390,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -407,7 +407,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_D16_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll index 8a104e1fbfc83..a58436c2e1a43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -34,7 +34,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -57,7 +57,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -76,7 +76,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -100,7 +100,7 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -120,7 +120,7 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZ_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZ_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -145,7 +145,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -166,7 +166,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -216,7 +216,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -271,7 +271,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -301,7 +301,7 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -318,7 +318,7 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll index 210c3bb50cb15..5d9e1c1531d7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -18,7 +18,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -35,7 +35,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -58,7 +58,7 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -77,7 +77,7 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -101,7 +101,7 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -121,7 +121,7 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -146,7 +146,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -167,7 +167,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -221,7 +221,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -280,7 +280,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>), align 1, addrspace 8) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -310,7 +310,7 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -327,7 +327,7 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_BYTE_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_BYTE_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i8), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -349,7 +349,7 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -366,7 +366,7 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -388,7 +388,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc @@ -405,7 +405,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (f32), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret void @@ -426,7 +426,7 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -443,7 +443,7 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -472,7 +472,7 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX8-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX8-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -491,7 +491,7 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) + ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>), align 1, addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll index cc937f497d2ca..08fc5fe31b37a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__v ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -40,7 +40,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_add_i32_noret__vgpr_val__sgpr_r ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -65,7 +65,7 @@ define amdgpu_ps <2 x float> @struct_ptr_buffer_atomic_add_i64__vgpr_val__sgpr_r ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] @@ -93,7 +93,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rs ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i64(i64 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -144,7 +144,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__s ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -206,7 +206,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rs ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -237,7 +237,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__v ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll index fb67ddaa2fb40..7b3720e91cba2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll @@ -19,7 +19,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -46,7 +46,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgp ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -100,7 +100,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -166,7 +166,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgp ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -198,7 +198,7 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i32) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -230,7 +230,7 @@ define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cm ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 @@ -266,7 +266,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgp ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -324,7 +324,7 @@ define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cm ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -399,7 +399,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgp ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -435,7 +435,7 @@ define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cm ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (i64) on %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll index 5b0b91f983fe6..e5b7c889dbb7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll @@ -1,7 +1,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s ; RUN: not --crash llc -global-isel < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 -; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (s32) on %ir.rsrc.load, align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) +; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(f32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x i32>), %{{[0-9]+}}:vgpr(i32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (f32) on %ir.rsrc.load, align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn) declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll index 18568aaa02634..7e0a5f687faf3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll @@ -18,7 +18,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -35,7 +35,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -56,7 +56,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset @@ -73,7 +73,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) @@ -93,7 +93,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset @@ -108,7 +108,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) ret void @@ -128,7 +128,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -143,7 +143,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -194,7 +194,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rs ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -249,7 +249,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rs ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -306,7 +306,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rs ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -358,7 +358,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rs ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -389,7 +389,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -406,7 +406,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -425,7 +425,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc @@ -440,7 +440,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rs ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (f32) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) ret void @@ -461,7 +461,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -478,7 +478,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -497,7 +497,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset @@ -512,7 +512,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x f16>) on %ir.rsrc, align 1, addrspace 8) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll index bc4bd34985372..365d26c5bf537 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll @@ -16,7 +16,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -33,7 +33,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -54,7 +54,7 @@ define amdgpu_ps <2 x half> @struct_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgp ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -82,7 +82,7 @@ define amdgpu_ps <2 x half> @struct_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgp ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -109,7 +109,7 @@ define amdgpu_ps <4 x half> @struct_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgp ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 @@ -147,7 +147,7 @@ define amdgpu_ps <4 x half> @struct_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgp ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]] @@ -200,7 +200,7 @@ define amdgpu_ps <4 x half> @struct_ptr_buffer_load_format_v4f16__vpr_rsrc__sgpr ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -275,7 +275,7 @@ define amdgpu_ps <4 x half> @struct_ptr_buffer_load_format_v4f16__vpr_rsrc__sgpr ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -308,7 +308,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -325,7 +325,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -347,7 +347,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16) from %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -364,7 +364,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16) from %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll index caaa76569ec89..0d1139a38c137 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_format_f32__sgpr_rsrc__vgpr_vinde ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -37,7 +37,7 @@ define amdgpu_ps <2 x float> @struct_ptr_buffer_load_format_v2f32__sgpr_rsrc__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] @@ -61,7 +61,7 @@ define amdgpu_ps <3 x float> @struct_ptr_buffer_load_format_v3f32__sgpr_rsrc__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 @@ -87,7 +87,7 @@ define amdgpu_ps <4 x float> @struct_ptr_buffer_load_format_v4f32__sgpr_rsrc__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 @@ -144,7 +144,7 @@ define amdgpu_ps <4 x float> @struct_ptr_buffer_load_format_v4f32__vpr_rsrc__sgp ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -181,7 +181,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_format_f32__sgpr_rsrc__vgpr_vinde ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -203,7 +203,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_format_i32__sgpr_rsrc__vgpr_vinde ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -229,15 +229,15 @@ define amdgpu_cs void @struct_ptr_buffer_load_format_v4i32_tfe(ptr addrspace(8) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x i32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub1 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub2 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub3 ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub4 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 - ; CHECK-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.value, addrspace 1) - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>) into %ir.value, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.status, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) @@ -268,14 +268,14 @@ define amdgpu_cs void @struct_ptr_buffer_load_format_v3i32_tfe(ptr addrspace(8) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x i32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub1 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub2 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub3 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 - ; CHECK-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.value, align 16, addrspace 1) - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>) into %ir.value, align 16, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.status, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) @@ -306,11 +306,11 @@ define amdgpu_cs void @struct_ptr_buffer_load_format_i32_tfe(ptr addrspace(8) in ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (i32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub1 - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.value, addrspace 1) - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.value, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32) into %ir.status, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll index 95789b5ac49ed..aac4f033b9921 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -38,7 +38,7 @@ define amdgpu_ps <2 x float> @struct_ptr_buffer_load_v2f32__sgpr_rsrc__vgpr_vind ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] @@ -63,7 +63,7 @@ define amdgpu_ps <3 x float> @struct_ptr_buffer_load_v3f32__sgpr_rsrc__vgpr_vind ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 @@ -90,7 +90,7 @@ define amdgpu_ps <4 x float> @struct_ptr_buffer_load_v4f32__sgpr_rsrc__vgpr_vind ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 @@ -120,7 +120,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) @@ -142,7 +142,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -164,7 +164,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) @@ -214,7 +214,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -244,7 +244,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8) from %ir.rsrc, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -267,7 +267,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_SBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_SBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i8) from %ir.rsrc, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -290,7 +290,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -313,7 +313,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_SSHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_SSHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (i16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -337,7 +337,7 @@ define amdgpu_ps half @struct_ptr_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (f16) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -359,7 +359,7 @@ define amdgpu_ps <2 x half> @struct_ptr_buffer_load_v2f16__sgpr_rsrc__vgpr_vinde ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x f16>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -387,7 +387,7 @@ define amdgpu_ps <4 x half> @struct_ptr_buffer_load_v4f16__sgpr_rsrc__vgpr_vinde ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x f16>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] @@ -412,7 +412,7 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll index fe2b04841e332..b65f8f0e094bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_f16__vgpr_val__sgpr_rsrc__ ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_ptr_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -34,7 +34,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_f16__vgpr_val__sgpr_rsrc__ ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.f16(half %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -59,7 +59,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_ptr_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -76,7 +76,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -110,7 +110,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_ptr_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -129,7 +129,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc ; PACKED-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -179,7 +179,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_f16__sgpr_val__vgpr_rsrc__ ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -234,7 +234,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_f16__sgpr_val__vgpr_rsrc__ ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -264,7 +264,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_i16__vgpr_val__sgpr_rsrc__ ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16) into %ir.rsrc, align 1, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; ; PACKED-LABEL: name: struct_ptr_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -281,7 +281,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_i16__vgpr_val__sgpr_rsrc__ ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16) into %ir.rsrc, align 1, addrspace 8) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll index 3c5c337c6912b..f4a17883129e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_f32__vgpr_val__sgpr_rsrc__ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -39,7 +39,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -63,7 +63,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f32(<3 x float> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -88,7 +88,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -138,7 +138,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_f32__sgpr_val__vgpr_rsrc__ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -168,7 +168,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_i32__vgpr_val__sgpr_rsrc__ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.format.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll index a18d0c2165275..4ffd027c2f07d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vin ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -40,7 +40,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_v ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -64,7 +64,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_v ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.v3f32(<3 x float> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -89,7 +89,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_v ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -143,7 +143,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_v ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f32>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -173,7 +173,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vind ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i8) into %ir.rsrc, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.struct.ptr.buffer.store.i8(i8 %val.trunc, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -195,7 +195,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vin ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (i16) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.struct.ptr.buffer.store.i16(i16 %val.trunc, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -217,7 +217,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vin ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret void @@ -238,7 +238,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_v ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -267,7 +267,7 @@ define amdgpu_ps void @struct_ptr_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_v ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x f16>) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll index b08b46f20fc9e..17745a01341b2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -38,7 +38,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] @@ -62,7 +62,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 @@ -88,7 +88,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 @@ -117,7 +117,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -166,7 +166,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -203,7 +203,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll index 23468c29ff79a..17224d48f1e97 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -18,7 +18,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -35,7 +35,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN]] ; CHECK-GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -56,7 +56,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] @@ -76,7 +76,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x f32>), align 1, addrspace 8) ; CHECK-GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN]].sub0 ; CHECK-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_BOTHEN]].sub1 ; CHECK-GFX12-NEXT: $vgpr0 = COPY [[COPY7]] @@ -100,7 +100,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 @@ -122,7 +122,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x f32>), align 1, addrspace 8) ; CHECK-GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN]].sub0 ; CHECK-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN]].sub1 ; CHECK-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_BOTHEN]].sub2 @@ -148,7 +148,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 @@ -172,7 +172,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_ ; CHECK-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; CHECK-GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN]].sub0 ; CHECK-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN]].sub1 ; CHECK-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN]].sub2 @@ -201,7 +201,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -219,7 +219,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN]] ; CHECK-GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -268,7 +268,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -329,7 +329,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-GFX12-NEXT: {{ $}} ; CHECK-GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x f32>), align 1, addrspace 8) ; CHECK-GFX12-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-GFX12-NEXT: {{ $}} @@ -366,7 +366,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -383,7 +383,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN]] ; CHECK-GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll index 3acff52874dd9..7758458db60bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll @@ -262,22 +262,19 @@ define i32 @v_udot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { ; GFX906-LABEL: v_udot2_fnegv2f16_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 +; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: v_udot2_fnegv2f16_c: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 +; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_udot2_fnegv2f16_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 +; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.c = fneg <2 x half> %c %cast.neg.c = bitcast <2 x half> %neg.c to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll index b14af9e043e09..dc079ae3ff96b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll @@ -132,15 +132,13 @@ define i32 @v_udot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { ; GFX906-LABEL: v_udot4_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX906-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 +; GFX906-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_udot4_fnegv2f16_a: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 +; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a %cast.neg.a = bitcast <2 x half> %neg.a to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll index a664c8aa508ef..b33ef8a184f86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll @@ -68,15 +68,13 @@ define i32 @v_udot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { ; GFX906-LABEL: v_udot8_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX906-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 +; GFX906-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_udot8_fnegv2f16_a: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 +; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a %cast.neg.a = bitcast <2 x half> %neg.a to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll index b6b4301dadc7a..0c2f6ee0f8b3c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll @@ -10,20 +10,20 @@ define <4 x i8> @global_load_v4i8_align4__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p1) :: (load (i32) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load = load <4 x i8>, ptr addrspace(1) %ptr, align 4, !range !0, !noundef !1 ret <4 x i8> %load @@ -35,17 +35,17 @@ define <3 x i8> @global_load_v3i8_align4__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p1) :: (load (i32) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %load = load <3 x i8>, ptr addrspace(1) %ptr, align 4, !range !0, !noundef !1 ret <3 x i8> %load @@ -56,14 +56,14 @@ define <2 x i8> @global_load_v2i8_align2__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s16) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p1) :: (load (i16) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %load = load <2 x i8>, ptr addrspace(1) %ptr, align 2, !range !0, !noundef !1 ret <2 x i8> %load @@ -74,15 +74,15 @@ define <2 x i64> @global_load_v2i64_align16__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !2, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[MV]](p1) :: (load (<2 x i64>) from %ir.ptr, !range !2, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load = load <2 x i64>, ptr addrspace(1) %ptr, align 16, !range !2, !noundef !1 ret <2 x i64> %load @@ -94,16 +94,16 @@ define i128 @global_load_i128_align16__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p1) :: (load (<4 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s128) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[MV]](p1) :: (load (<4 x i32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](i128) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load = load i128, ptr addrspace(1) %ptr, align 16, !range !3, !noundef !1 ret i128 %load @@ -116,11 +116,11 @@ define i32 @global_sextload_i8_align1__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !0, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[MV]](p1) :: (load (i8) from %ir.ptr, !range !0, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i8, ptr addrspace(1) %ptr, align 1, !range !0, !noundef !1 %ext = sext i8 %load to i32 @@ -132,11 +132,11 @@ define i32 @global_zextload_i8_align1__rangemd(ptr addrspace(1) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !4, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[MV]](p1) :: (load (i8) from %ir.ptr, !range !4, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i8, ptr addrspace(1) %ptr, align 1, !range !4, !noundef !1 %ext = sext i8 %load to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll index 784611cf68dd2..68ad67decf1dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -654,6 +654,7 @@ define amdgpu_ps half @lshr_i16_sv(i16 inreg %value, i16 %amount) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: lshr_i16_sv: @@ -681,6 +682,7 @@ define amdgpu_ps half @lshr_i16_vs(i16 %value, i16 inreg %amount) { ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: lshr_i16_vs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll index 44b12a9f6fe81..f211d6acf1553 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll @@ -13,10 +13,10 @@ define void @fence_loads(ptr %ptr) { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4) + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (i8) from %ir.ptr, align 4) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4) + ; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (i8) into %ir.ptr, align 4) ; CHECK-NEXT: SI_RETURN fence release, !mmra !0 %ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll index 090aa067a5260..9027658534fd8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll @@ -10,10 +10,10 @@ define void @use_invariant_promotable_lds(ptr addrspace(5) %arg, i32 %i) { ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[C]](s32), [[DEF]](p0) :: (store (s32) into %ir.tmp) + ; CHECK-NEXT: G_STORE [[C]](i32), [[DEF]](p0) :: (store (i32) into %ir.tmp) ; CHECK-NEXT: SI_RETURN bb: %tmp = call ptr @llvm.invariant.start.p5(i64 4, ptr addrspace(5) %arg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 774b9c3972f9d..c326177e3984e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -8,40 +8,42 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY9]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY9]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -54,42 +56,44 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr10 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[COPY8]](i32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY10]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr10 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[COPY8]](i32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY10]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -103,16 +107,16 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -120,33 +124,33 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x i32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](i32), [[BUILD_VECTOR1]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -156,8 +160,9 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY9]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc @@ -165,16 +170,16 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -182,33 +187,33 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x i32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](i32), [[BUILD_VECTOR1]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -218,8 +223,9 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY9]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -233,51 +239,51 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[COPY8]](i32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.3 + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x i32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](i32), [[BUILD_VECTOR1]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -287,8 +293,9 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY10]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc @@ -296,51 +303,51 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[COPY8]](i32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.3 + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x i32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](i32), [[BUILD_VECTOR1]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -350,8 +357,9 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[BITCAST]](<4 x f32>), [[COPY10]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index 3c4e8d411d9be..ea5b06bd197e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -8,48 +8,52 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr10 + ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(i32) = COPY $sgpr11 + ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(i32) = COPY $sgpr12 + ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(i32) = COPY $sgpr13 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; FAST-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr10 + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(i32) = COPY $sgpr11 + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(i32) = COPY $sgpr12 + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(i32) = COPY $sgpr13 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GREEDY-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -62,50 +66,54 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr10 + ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(i32) = COPY $sgpr11 + ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(i32) = COPY $sgpr12 + ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(i32) = COPY $sgpr13 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; FAST-NEXT: [[COPY12:%[0-9]+]]:sgpr(f32) = COPY $sgpr14 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY12]](f32) + ; FAST-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](i32), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; FAST-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr10 + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(i32) = COPY $sgpr11 + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(i32) = COPY $sgpr12 + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(i32) = COPY $sgpr13 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:sgpr(f32) = COPY $sgpr14 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY12]](f32) + ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](i32), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GREEDY-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -119,55 +127,56 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %24, %bb.3 + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x i32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR2]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -177,7 +186,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; FAST-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp @@ -185,55 +195,56 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %24, %bb.3 + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x i32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR2]](<8 x i32>), [[BUILD_VECTOR1]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -243,7 +254,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GREEDY-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -257,47 +269,48 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr4 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %24, %bb.3 + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; FAST-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; FAST-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x i32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR2]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -307,7 +320,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; FAST-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp @@ -315,47 +329,48 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr4 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %24, %bb.3 + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x i32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR]](<8 x i32>), [[BUILD_VECTOR2]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -365,7 +380,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GREEDY-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -379,67 +395,68 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr8 + ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY $vgpr9 + ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY $vgpr10 + ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY $vgpr11 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr12 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec - ; FAST-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) - ; FAST-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] - ; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] - ; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] - ; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] - ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %24, %bb.3 + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x i32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[UV16:%[0-9]+]]:vgpr_32(i32), [[UV17:%[0-9]+]]:vgpr_32(i32), [[UV18:%[0-9]+]]:vgpr_32(i32), [[UV19:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV16]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV17]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV18]](i32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV19]](i32), implicit $exec + ; FAST-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](i32), [[V_READFIRSTLANE_B32_9]](i32), [[V_READFIRSTLANE_B32_10]](i32), [[V_READFIRSTLANE_B32_11]](i32) + ; FAST-NEXT: [[UV20:%[0-9]+]]:vgpr(i64), [[UV21:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; FAST-NEXT: [[UV22:%[0-9]+]]:sgpr(i64), [[UV23:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x i32>) + ; FAST-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV22]](i64), [[UV20]] + ; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(i1) = G_AND [[AND2]], [[ICMP4]] + ; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV23]](i64), [[UV21]] + ; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(i1) = G_AND [[AND3]], [[ICMP5]] + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](i1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR2]](<8 x i32>), [[BUILD_VECTOR3]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -449,7 +466,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; FAST-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp @@ -457,67 +475,68 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY $vgpr8 + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY $vgpr9 + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY $vgpr10 + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY $vgpr11 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(f32) = COPY $vgpr12 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(i32) = G_BITCAST [[COPY12]](f32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec - ; GREEDY-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] - ; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] - ; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] - ; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] - ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %24, %bb.3 + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x i32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[UV16:%[0-9]+]]:vgpr_32(i32), [[UV17:%[0-9]+]]:vgpr_32(i32), [[UV18:%[0-9]+]]:vgpr_32(i32), [[UV19:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV16]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV17]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV18]](i32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV19]](i32), implicit $exec + ; GREEDY-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](i32), [[V_READFIRSTLANE_B32_9]](i32), [[V_READFIRSTLANE_B32_10]](i32), [[V_READFIRSTLANE_B32_11]](i32) + ; GREEDY-NEXT: [[UV20:%[0-9]+]]:vgpr(i64), [[UV21:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GREEDY-NEXT: [[UV22:%[0-9]+]]:sgpr(i64), [[UV23:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x i32>) + ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV22]](i64), [[UV20]] + ; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(i1) = G_AND [[AND2]], [[ICMP4]] + ; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV23]](i64), [[UV21]] + ; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(i1) = G_AND [[AND3]], [[ICMP5]] + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](i1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[BITCAST]](i32), [[BUILD_VECTOR2]](<8 x i32>), [[BUILD_VECTOR3]](<4 x i32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x f32>), addrspace 8) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -527,7 +546,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f32>) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + ; GREEDY-NEXT: G_STORE [[BITCAST1]](<4 x f32>), [[DEF]](p1) :: (store (<4 x f32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index 3df5a16fa74d8..4f712acc87821 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -8,17 +8,18 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (f32), align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -30,18 +31,19 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY6]](i32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (f32), align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -54,40 +56,40 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -97,7 +99,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -110,31 +113,31 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY5]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY5]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -144,7 +147,8 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -157,43 +161,43 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY5]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](i32), [[COPY5]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -203,7 +207,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll index 840b1e8f914ef..02a049bb6e664 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll @@ -8,17 +8,18 @@ define amdgpu_ps float @raw_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -30,18 +31,19 @@ define amdgpu_ps float @raw_ptr_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY6]](i32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -54,40 +56,40 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %21, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -97,7 +99,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -110,31 +113,31 @@ define amdgpu_ps float @raw_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %21, %bb.3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY5]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY5]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -144,7 +147,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -157,43 +161,43 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %21, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY5]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](i32), [[COPY5]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY6]](i32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -203,7 +207,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index c7e4cbd5d500b..462261c15a930 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -9,32 +9,32 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(i32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[AMDGPU_S_BUFFER_LOAD]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; ; GFX12-LABEL: name: s_buffer_load_i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(i32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[AMDGPU_S_BUFFER_LOAD]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val @@ -45,40 +45,40 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64)) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i64)) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x i32>) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; ; GFX12-LABEL: name: s_buffer_load_v2i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64)) - ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i64)) + ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x i32>) + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val @@ -89,46 +89,46 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i32>) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV2]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](i32) + ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; ; GFX12-LABEL: name: s_buffer_load_v3i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GFX12-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<3 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x i32>) + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV2]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](i32) + ; GFX12-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val @@ -139,76 +139,76 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256)) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32) - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32) - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32) - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i256)) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32), [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32), [[UV6:%[0-9]+]]:sgpr(i32), [[UV7:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x i32>) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV2]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](i32) + ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[UV3]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](i32) + ; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](i32) + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[UV4]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](i32) + ; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](i32) + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[UV5]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](i32) + ; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](i32) + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY [[UV6]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](i32) + ; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](i32) + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[UV7]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](i32) + ; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](i32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256)) - ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GFX12-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GFX12-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GFX12-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GFX12-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GFX12-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GFX12-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i256)) + ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32), [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32), [[UV6:%[0-9]+]]:sgpr(i32), [[UV7:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x i32>) + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV2]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](i32) + ; GFX12-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[UV3]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](i32) + ; GFX12-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](i32) + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[UV4]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](i32) + ; GFX12-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[UV5]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](i32) + ; GFX12-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY [[UV6]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](i32) + ; GFX12-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[UV7]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](i32) + ; GFX12-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](i32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val @@ -219,124 +219,124 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512)) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32) - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32) - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32) - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32) - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; GFX7-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32) - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; GFX7-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32) - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; GFX7-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32) - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; GFX7-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32) - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; GFX7-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32) - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; GFX7-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32) - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; GFX7-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32) - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; GFX7-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i512)) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32), [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32), [[UV6:%[0-9]+]]:sgpr(i32), [[UV7:%[0-9]+]]:sgpr(i32), [[UV8:%[0-9]+]]:sgpr(i32), [[UV9:%[0-9]+]]:sgpr(i32), [[UV10:%[0-9]+]]:sgpr(i32), [[UV11:%[0-9]+]]:sgpr(i32), [[UV12:%[0-9]+]]:sgpr(i32), [[UV13:%[0-9]+]]:sgpr(i32), [[UV14:%[0-9]+]]:sgpr(i32), [[UV15:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x i32>) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV2]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](i32) + ; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[UV3]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](i32) + ; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](i32) + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[UV4]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](i32) + ; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](i32) + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[UV5]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](i32) + ; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](i32) + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY [[UV6]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](i32) + ; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](i32) + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[UV7]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](i32) + ; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](i32) + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[UV8]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](i32) + ; GFX7-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](i32) + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr(i32) = COPY [[UV9]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](i32) + ; GFX7-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](i32) + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr(i32) = COPY [[UV10]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](i32) + ; GFX7-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](i32) + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr(i32) = COPY [[UV11]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](i32) + ; GFX7-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](i32) + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr(i32) = COPY [[UV12]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](i32) + ; GFX7-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](i32) + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr(i32) = COPY [[UV13]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](i32) + ; GFX7-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](i32) + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr(i32) = COPY [[UV14]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](i32) + ; GFX7-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](i32) + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr(i32) = COPY [[UV15]](i32) + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](i32) + ; GFX7-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](i32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; ; GFX12-LABEL: name: s_buffer_load_v16i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512)) - ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GFX12-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) - ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GFX12-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) - ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GFX12-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32) - ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GFX12-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32) - ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GFX12-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32) - ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GFX12-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32) - ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; GFX12-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32) - ; GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; GFX12-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32) - ; GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; GFX12-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32) - ; GFX12-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; GFX12-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32) - ; GFX12-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; GFX12-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32) - ; GFX12-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; GFX12-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32) - ; GFX12-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; GFX12-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32) - ; GFX12-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; GFX12-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), 0 :: (dereferenceable invariant load (i512)) + ; GFX12-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32), [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32), [[UV6:%[0-9]+]]:sgpr(i32), [[UV7:%[0-9]+]]:sgpr(i32), [[UV8:%[0-9]+]]:sgpr(i32), [[UV9:%[0-9]+]]:sgpr(i32), [[UV10:%[0-9]+]]:sgpr(i32), [[UV11:%[0-9]+]]:sgpr(i32), [[UV12:%[0-9]+]]:sgpr(i32), [[UV13:%[0-9]+]]:sgpr(i32), [[UV14:%[0-9]+]]:sgpr(i32), [[UV15:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x i32>) + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](i32) + ; GFX12-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](i32) + ; GFX12-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV2]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](i32) + ; GFX12-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](i32) + ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[UV3]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](i32) + ; GFX12-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](i32) + ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[UV4]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](i32) + ; GFX12-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](i32) + ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[UV5]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](i32) + ; GFX12-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](i32) + ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY [[UV6]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](i32) + ; GFX12-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](i32) + ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[UV7]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](i32) + ; GFX12-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](i32) + ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[UV8]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](i32) + ; GFX12-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](i32) + ; GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr(i32) = COPY [[UV9]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](i32) + ; GFX12-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](i32) + ; GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr(i32) = COPY [[UV10]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](i32) + ; GFX12-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](i32) + ; GFX12-NEXT: [[COPY16:%[0-9]+]]:vgpr(i32) = COPY [[UV11]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](i32) + ; GFX12-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](i32) + ; GFX12-NEXT: [[COPY17:%[0-9]+]]:vgpr(i32) = COPY [[UV12]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](i32) + ; GFX12-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](i32) + ; GFX12-NEXT: [[COPY18:%[0-9]+]]:vgpr(i32) = COPY [[UV13]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](i32) + ; GFX12-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](i32) + ; GFX12-NEXT: [[COPY19:%[0-9]+]]:vgpr(i32) = COPY [[UV14]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](i32) + ; GFX12-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](i32) + ; GFX12-NEXT: [[COPY20:%[0-9]+]]:vgpr(i32) = COPY [[UV15]](i32) + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](i32) + ; GFX12-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](i32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val @@ -348,32 +348,32 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -384,36 +384,36 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i64), align 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; GFX12-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i64), align 4) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val @@ -424,38 +424,38 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX12-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<3 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i96), align 4) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<3 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val @@ -466,40 +466,40 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val @@ -510,52 +510,52 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -566,72 +566,72 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX7-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>), [[AMDGPU_BUFFER_LOAD2]](<4 x f32>), [[AMDGPU_BUFFER_LOAD3]](<4 x f32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32), [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; GFX7-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr15 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; ; GFX12-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX12-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>), [[AMDGPU_BUFFER_LOAD2]](<4 x f32>), [[AMDGPU_BUFFER_LOAD3]](<4 x f32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32), [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; GFX12-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr15 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -642,36 +642,36 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; GFX7-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](i128) + ; GFX7-NEXT: G_STORE [[TRUNC]](i96), [[DEF]](p1) :: (store (i96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_i96_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s96) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i96) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i96), align 4) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[AMDGPU_BUFFER_LOAD]](s96), [[COPY5]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GFX12-NEXT: G_STORE [[AMDGPU_BUFFER_LOAD]](i96), [[COPY5]](p1) :: (store (i96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) store i96 %val, ptr addrspace(1) undef @@ -684,48 +684,48 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; GFX7-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GFX7-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[MV:%[0-9]+]]:vgpr(i256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](i128), [[AMDGPU_BUFFER_LOAD1]](i128) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(i128), [[UV1:%[0-9]+]]:vgpr(i128) = G_UNMERGE_VALUES [[MV]](i256) + ; GFX7-NEXT: G_STORE [[UV]](i128), [[DEF]](p1) :: (store (i128) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) + ; GFX7-NEXT: G_STORE [[UV1]](i128), [[PTR_ADD]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_i256_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(i256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](i128), [[AMDGPU_BUFFER_LOAD1]](i128) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(i128), [[UV1:%[0-9]+]]:vgpr(i128) = G_UNMERGE_VALUES [[MV]](i256) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[UV]](s128), [[COPY5]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: G_STORE [[UV]](i128), [[COPY5]](p1) :: (store (i128) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) - ; GFX12-NEXT: G_STORE [[UV1]](s128), [[COPY6]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) + ; GFX12-NEXT: G_STORE [[UV1]](i128), [[COPY6]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) store i256 %val, ptr addrspace(1) undef @@ -738,66 +738,66 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; GFX7-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GFX7-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) - ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GFX7-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1) - ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GFX7-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[MV:%[0-9]+]]:vgpr(i512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](i128), [[AMDGPU_BUFFER_LOAD1]](i128), [[AMDGPU_BUFFER_LOAD2]](i128), [[AMDGPU_BUFFER_LOAD3]](i128) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(i128), [[UV1:%[0-9]+]]:vgpr(i128), [[UV2:%[0-9]+]]:vgpr(i128), [[UV3:%[0-9]+]]:vgpr(i128) = G_UNMERGE_VALUES [[MV]](i512) + ; GFX7-NEXT: G_STORE [[UV]](i128), [[DEF]](p1) :: (store (i128) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) + ; GFX7-NEXT: G_STORE [[UV1]](i128), [[PTR_ADD]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) + ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) + ; GFX7-NEXT: G_STORE [[UV2]](i128), [[PTR_ADD1]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1) + ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) + ; GFX7-NEXT: G_STORE [[UV3]](i128), [[PTR_ADD2]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_i512_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(i128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(i512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](i128), [[AMDGPU_BUFFER_LOAD1]](i128), [[AMDGPU_BUFFER_LOAD2]](i128), [[AMDGPU_BUFFER_LOAD3]](i128) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(i128), [[UV1:%[0-9]+]]:vgpr(i128), [[UV2:%[0-9]+]]:vgpr(i128), [[UV3:%[0-9]+]]:vgpr(i128) = G_UNMERGE_VALUES [[MV]](i512) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[UV]](s128), [[COPY5]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: G_STORE [[UV]](i128), [[COPY5]](p1) :: (store (i128) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) - ; GFX12-NEXT: G_STORE [[UV1]](s128), [[COPY6]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) - ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GFX12-NEXT: G_STORE [[UV1]](i128), [[COPY6]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) + ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD1]](p1) - ; GFX12-NEXT: G_STORE [[UV2]](s128), [[COPY7]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1) - ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GFX12-NEXT: G_STORE [[UV2]](i128), [[COPY7]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1) + ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD2]](p1) - ; GFX12-NEXT: G_STORE [[UV3]](s128), [[COPY8]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1) + ; GFX12-NEXT: G_STORE [[UV3]](i128), [[COPY8]](p1) :: (store (i128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) store i512 %val, ptr addrspace(1) undef @@ -810,48 +810,48 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; GFX7-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GFX7-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x i16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x i16>), [[AMDGPU_BUFFER_LOAD1]](<8 x i16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x i16>), [[UV1:%[0-9]+]]:vgpr(<8 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x i16>) + ; GFX7-NEXT: G_STORE [[UV]](<8 x i16>), [[DEF]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) + ; GFX7-NEXT: G_STORE [[UV1]](<8 x i16>), [[PTR_ADD]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x i16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x i16>), [[AMDGPU_BUFFER_LOAD1]](<8 x i16>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x i16>), [[UV1:%[0-9]+]]:vgpr(<8 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x i16>) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[UV]](<8 x s16>), [[COPY5]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: G_STORE [[UV]](<8 x i16>), [[COPY5]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) - ; GFX12-NEXT: G_STORE [[UV1]](<8 x s16>), [[COPY6]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) + ; GFX12-NEXT: G_STORE [[UV1]](<8 x i16>), [[COPY6]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <16 x i16> %val, ptr addrspace(1) undef @@ -864,66 +864,66 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; GFX7-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GFX7-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) - ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GFX7-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) - ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GFX7-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x i16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x i16>), [[AMDGPU_BUFFER_LOAD1]](<8 x i16>), [[AMDGPU_BUFFER_LOAD2]](<8 x i16>), [[AMDGPU_BUFFER_LOAD3]](<8 x i16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x i16>), [[UV1:%[0-9]+]]:vgpr(<8 x i16>), [[UV2:%[0-9]+]]:vgpr(<8 x i16>), [[UV3:%[0-9]+]]:vgpr(<8 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x i16>) + ; GFX7-NEXT: G_STORE [[UV]](<8 x i16>), [[DEF]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) + ; GFX7-NEXT: G_STORE [[UV1]](<8 x i16>), [[PTR_ADD]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) + ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) + ; GFX7-NEXT: G_STORE [[UV2]](<8 x i16>), [[PTR_ADD1]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) + ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) + ; GFX7-NEXT: G_STORE [[UV3]](<8 x i16>), [[PTR_ADD2]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x i16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x i16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x i16>), [[AMDGPU_BUFFER_LOAD1]](<8 x i16>), [[AMDGPU_BUFFER_LOAD2]](<8 x i16>), [[AMDGPU_BUFFER_LOAD3]](<8 x i16>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x i16>), [[UV1:%[0-9]+]]:vgpr(<8 x i16>), [[UV2:%[0-9]+]]:vgpr(<8 x i16>), [[UV3:%[0-9]+]]:vgpr(<8 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x i16>) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[UV]](<8 x s16>), [[COPY5]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: G_STORE [[UV]](<8 x i16>), [[COPY5]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) - ; GFX12-NEXT: G_STORE [[UV1]](<8 x s16>), [[COPY6]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) - ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GFX12-NEXT: G_STORE [[UV1]](<8 x i16>), [[COPY6]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) + ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD1]](p1) - ; GFX12-NEXT: G_STORE [[UV2]](<8 x s16>), [[COPY7]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) - ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GFX12-NEXT: G_STORE [[UV2]](<8 x i16>), [[COPY7]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) + ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD2]](p1) - ; GFX12-NEXT: G_STORE [[UV3]](<8 x s16>), [[COPY8]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) + ; GFX12-NEXT: G_STORE [[UV3]](<8 x i16>), [[COPY8]](p1) :: (store (<8 x i16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <32 x i16> %val, ptr addrspace(1) undef @@ -936,48 +936,48 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GFX7-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GFX7-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x i64>), [[AMDGPU_BUFFER_LOAD1]](<2 x i64>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i64>), [[UV1:%[0-9]+]]:vgpr(<2 x i64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x i64>) + ; GFX7-NEXT: G_STORE [[UV]](<2 x i64>), [[DEF]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) + ; GFX7-NEXT: G_STORE [[UV1]](<2 x i64>), [[PTR_ADD]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x i64>), [[AMDGPU_BUFFER_LOAD1]](<2 x i64>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i64>), [[UV1:%[0-9]+]]:vgpr(<2 x i64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x i64>) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[UV]](<2 x s64>), [[COPY5]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: G_STORE [[UV]](<2 x i64>), [[COPY5]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) - ; GFX12-NEXT: G_STORE [[UV1]](<2 x s64>), [[COPY6]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) + ; GFX12-NEXT: G_STORE [[UV1]](<2 x i64>), [[COPY6]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i64> %val, ptr addrspace(1) undef @@ -990,66 +990,66 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GFX7-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GFX7-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) - ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GFX7-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) - ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GFX7-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x i64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x i64>), [[AMDGPU_BUFFER_LOAD1]](<2 x i64>), [[AMDGPU_BUFFER_LOAD2]](<2 x i64>), [[AMDGPU_BUFFER_LOAD3]](<2 x i64>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i64>), [[UV1:%[0-9]+]]:vgpr(<2 x i64>), [[UV2:%[0-9]+]]:vgpr(<2 x i64>), [[UV3:%[0-9]+]]:vgpr(<2 x i64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x i64>) + ; GFX7-NEXT: G_STORE [[UV]](<2 x i64>), [[DEF]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) + ; GFX7-NEXT: G_STORE [[UV1]](<2 x i64>), [[PTR_ADD]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) + ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) + ; GFX7-NEXT: G_STORE [[UV2]](<2 x i64>), [[PTR_ADD1]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) + ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) + ; GFX7-NEXT: G_STORE [[UV3]](<2 x i64>), [[PTR_ADD2]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x i64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x i64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x i64>), [[AMDGPU_BUFFER_LOAD1]](<2 x i64>), [[AMDGPU_BUFFER_LOAD2]](<2 x i64>), [[AMDGPU_BUFFER_LOAD3]](<2 x i64>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i64>), [[UV1:%[0-9]+]]:vgpr(<2 x i64>), [[UV2:%[0-9]+]]:vgpr(<2 x i64>), [[UV3:%[0-9]+]]:vgpr(<2 x i64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x i64>) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GFX12-NEXT: G_STORE [[UV]](<2 x s64>), [[COPY5]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: G_STORE [[UV]](<2 x i64>), [[COPY5]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) - ; GFX12-NEXT: G_STORE [[UV1]](<2 x s64>), [[COPY6]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) - ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GFX12-NEXT: G_STORE [[UV1]](<2 x i64>), [[COPY6]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) + ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD1]](p1) - ; GFX12-NEXT: G_STORE [[UV2]](<2 x s64>), [[COPY7]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) - ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GFX12-NEXT: G_STORE [[UV2]](<2 x i64>), [[COPY7]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) + ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD2]](p1) - ; GFX12-NEXT: G_STORE [[UV3]](<2 x s64>), [[COPY8]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) + ; GFX12-NEXT: G_STORE [[UV3]](<2 x i64>), [[COPY8]](p1) :: (store (<2 x i64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i64> %val, ptr addrspace(1) undef @@ -1062,22 +1062,22 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) ; GFX7-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX7-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; @@ -1085,23 +1085,23 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) ; GFX12-NEXT: G_STORE [[UV]](<2 x p1>), [[COPY5]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) ; GFX12-NEXT: G_STORE [[UV1]](<2 x p1>), [[COPY6]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 @@ -1116,30 +1116,30 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) ; GFX7-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX7-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) - ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GFX7-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) ; GFX7-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) - ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GFX7-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) ; GFX7-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 ; @@ -1147,33 +1147,33 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) ; GFX12-NEXT: G_STORE [[UV]](<2 x p1>), [[COPY5]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1) - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](i64) ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD]](p1) ; GFX12-NEXT: G_STORE [[UV1]](<2 x p1>), [[COPY6]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) - ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GFX12-NEXT: [[C3:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 32 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](i64) ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD1]](p1) ; GFX12-NEXT: G_STORE [[UV2]](<2 x p1>), [[COPY7]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) - ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GFX12-NEXT: [[C4:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 48 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](i64) ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr(p1) = COPY [[PTR_ADD2]](p1) ; GFX12-NEXT: G_STORE [[UV3]](<2 x p1>), [[COPY8]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GFX12-NEXT: S_ENDPGM 0 @@ -1187,38 +1187,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4092 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4092 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1230,38 +1230,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1273,37 +1273,37 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4096, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4096, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1316,58 +1316,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4064 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4064 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1380,57 +1380,57 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4068 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4068, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4084, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4068 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4068, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4084, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1442,78 +1442,78 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX7-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4032 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>), [[AMDGPU_BUFFER_LOAD2]](<4 x f32>), [[AMDGPU_BUFFER_LOAD3]](<4 x f32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32), [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; GFX7-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr15 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; ; GFX12-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX12-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4032 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>), [[AMDGPU_BUFFER_LOAD2]](<4 x f32>), [[AMDGPU_BUFFER_LOAD3]](<4 x f32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32), [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; GFX12-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr15 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1525,77 +1525,77 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX7-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4036 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>), [[AMDGPU_BUFFER_LOAD2]](<4 x f32>), [[AMDGPU_BUFFER_LOAD3]](<4 x f32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32), [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; GFX7-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr15 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; ; GFX12-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4036, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4052, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4068, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4084, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX12-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4036 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4036, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4052, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4068, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4084, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 48, align 4) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>), [[AMDGPU_BUFFER_LOAD2]](<4 x f32>), [[AMDGPU_BUFFER_LOAD3]](<4 x f32>) + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr(f32), [[UV1:%[0-9]+]]:vgpr(f32), [[UV2:%[0-9]+]]:vgpr(f32), [[UV3:%[0-9]+]]:vgpr(f32), [[UV4:%[0-9]+]]:vgpr(f32), [[UV5:%[0-9]+]]:vgpr(f32), [[UV6:%[0-9]+]]:vgpr(f32), [[UV7:%[0-9]+]]:vgpr(f32), [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV4]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV5]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV6]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV7]](f32) + ; GFX12-NEXT: $vgpr8 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr9 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr10 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr11 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr12 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr13 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr14 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr15 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1608,38 +1608,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -1647,45 +1647,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1693,7 +1693,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1705,39 +1705,39 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4092 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -1745,46 +1745,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4092 + ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1792,7 +1792,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1805,40 +1805,40 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -1846,46 +1846,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4096, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4096, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1893,7 +1893,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1906,38 +1906,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (i32) from unknown-address + 4095, align 1) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -1945,45 +1945,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (i32) from unknown-address + 4095, align 1) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -1991,7 +1991,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val @@ -2003,38 +2003,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (i32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2042,45 +2042,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4096, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4096, 0, 0 :: (dereferenceable invariant load (i32) from unknown-address + 4096) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2088,7 +2088,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val @@ -2101,40 +2101,40 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4064 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2142,56 +2142,56 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4064 + ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %25, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2199,16 +2199,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2222,41 +2222,41 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4068 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2264,56 +2264,56 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4068 + ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %25, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4068, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4084, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4068, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4084, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2321,16 +2321,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2342,41 +2342,41 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2384,56 +2384,56 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4096 + ; GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY4]], [[C]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %25, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4096, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4112, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4096, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[C1]], [[COPY4]], 4112, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2441,16 +2441,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2462,40 +2462,40 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5000 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2503,57 +2503,57 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5000 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 5000, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 5016, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 5000, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 5016, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2561,16 +2561,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2582,40 +2582,40 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4076 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2623,57 +2623,57 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4076 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4076, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4076, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2681,16 +2681,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2702,40 +2702,40 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4080 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C1]](i32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2743,57 +2743,57 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4080 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY5]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4096, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (i128), align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C2]](i32), [[COPY4]], [[C1]], 4096, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2801,16 +2801,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2822,39 +2822,39 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4064 + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2862,55 +2862,55 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX7-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX7-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX7-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX7-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX7-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX7-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX7-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX7-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX12-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4064 + ; GFX12-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; GFX12-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %25, %bb.3 - ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](s32), implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; GFX12-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; GFX12-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; GFX12-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; GFX12-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; GFX12-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[INTRINSIC_CONVERGENT]](i32), implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: ; GFX12-NEXT: successors: %bb.4, %bb.2 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x f32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[C3]](i32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (i128) from unknown-address + 4064, align 4) ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX12-NEXT: {{ $}} @@ -2918,16 +2918,16 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.5: - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x f32>), [[AMDGPU_BUFFER_LOAD1]](<4 x f32>) + ; GFX12-NEXT: [[UV8:%[0-9]+]]:vgpr(f32), [[UV9:%[0-9]+]]:vgpr(f32), [[UV10:%[0-9]+]]:vgpr(f32), [[UV11:%[0-9]+]]:vgpr(f32), [[UV12:%[0-9]+]]:vgpr(f32), [[UV13:%[0-9]+]]:vgpr(f32), [[UV14:%[0-9]+]]:vgpr(f32), [[UV15:%[0-9]+]]:vgpr(f32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x f32>) + ; GFX12-NEXT: $vgpr0 = COPY [[UV8]](f32) + ; GFX12-NEXT: $vgpr1 = COPY [[UV9]](f32) + ; GFX12-NEXT: $vgpr2 = COPY [[UV10]](f32) + ; GFX12-NEXT: $vgpr3 = COPY [[UV11]](f32) + ; GFX12-NEXT: $vgpr4 = COPY [[UV12]](f32) + ; GFX12-NEXT: $vgpr5 = COPY [[UV13]](f32) + ; GFX12-NEXT: $vgpr6 = COPY [[UV14]](f32) + ; GFX12-NEXT: $vgpr7 = COPY [[UV15]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val @@ -2938,36 +2938,36 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY6]] + ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GFX12-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY6]] + ; GFX12-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2979,36 +2979,36 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY6]], [[COPY4]] + ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GFX12-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY6]], [[COPY4]] + ; GFX12-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C]](i32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -3020,44 +3020,44 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY6]] + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY6]] + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 @@ -3070,44 +3070,44 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY6]], [[COPY4]] + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY6]], [[COPY4]] + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 @@ -3121,44 +3121,44 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY6]], [[COPY4]] + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY6]], [[COPY4]] + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v @@ -3171,44 +3171,44 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX7-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY6]] + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX7-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX7-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX12-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX12: bb.1 (%ir-block.0): ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; GFX12-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY4]], [[COPY6]] + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1024 + ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GFX12-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[COPY7]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(f32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](f32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 0df8e68e7093c..145e7de8f3648 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -8,16 +8,17 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (f32), align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -29,18 +30,19 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (f32), align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -53,39 +55,39 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -95,7 +97,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -108,30 +111,30 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY6]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY6]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -141,7 +144,8 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -154,42 +158,42 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY6]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](i32), [[COPY6]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -199,7 +203,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index 4dc0778a08884..223900cbbdfd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -8,16 +8,16 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (f32), align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -29,19 +29,19 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(f32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(f32) = COPY [[COPY4]](f32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[COPY6]](i32) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY8]](f32), [[BUILD_VECTOR]](<4 x i32>), [[COPY9]](i32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (f32), align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -54,15 +54,15 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -70,24 +70,24 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR1]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,15 +109,15 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -125,15 +125,15 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY7]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY7]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -155,15 +155,15 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -171,27 +171,27 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY7]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](i32), [[COPY7]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR1]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (f32), align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll index 9acc9d0be294e..ab90ab39259b9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll @@ -8,16 +8,17 @@ define amdgpu_ps float @struct_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -29,18 +30,19 @@ define amdgpu_ps float @struct_ptr_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -53,39 +55,39 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -95,7 +97,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -108,30 +111,30 @@ define amdgpu_ps float @struct_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY6]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY6]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -141,7 +144,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -154,42 +158,42 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY6]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](i32), [[COPY6]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x i32>), [[COPY4]](i32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (f32) from %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -199,7 +203,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[AMDGPU_BUFFER_LOAD]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](f32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll index 37880233b2733..dfd8db8d7ed3d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll @@ -8,16 +8,16 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr6 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -29,19 +29,19 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(f32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY $sgpr7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(f32) = COPY [[COPY4]](f32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[COPY5]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[COPY6]](i32) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY8]](f32), [[BUILD_VECTOR]](<4 x i32>), [[COPY9]](i32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -54,15 +54,15 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -70,24 +70,24 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR1]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,15 +109,15 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -125,15 +125,15 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY7]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY7]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -155,15 +155,15 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -171,27 +171,27 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY7]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](i32), [[COPY7]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](f32), [[BUILD_VECTOR1]](<4 x i32>), [[COPY5]](i32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (f32) into %ir.rsrc, align 1, addrspace 8) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index 4bfd29430ff1e..f38cbc0febbde 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -2676,6 +2676,7 @@ define amdgpu_ps half @saddsat_i16_sv(i16 inreg %lhs, i16 %rhs) { ; GFX6-NEXT: v_min_i32_e32 v0, s1, v0 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: saddsat_i16_sv: @@ -2718,6 +2719,7 @@ define amdgpu_ps half @saddsat_i16_vs(i16 %lhs, i16 inreg %rhs) { ; GFX6-NEXT: v_min_i32_e32 v1, v2, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: saddsat_i16_vs: @@ -5343,8 +5345,7 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v5 ; GFX6-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc ; GFX6-NEXT: v_xor_b32_e32 v2, v2, v6 -; GFX6-NEXT: v_bfrev_b32_e32 v6, 1 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v3, v6 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, 0x80000000, v3 ; GFX6-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc @@ -5374,8 +5375,7 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc ; GFX8-NEXT: v_xor_b32_e32 v2, v2, v6 -; GFX8-NEXT: v_bfrev_b32_e32 v6, 1 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v3, v6 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x80000000, v3 ; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc @@ -5491,10 +5491,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX6-NEXT: v_cmp_eq_u64_e64 s[0:1], s[2:3], 0 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v2 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] ; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc @@ -5527,8 +5526,7 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v7 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000000, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll index ee3bf96111994..ec3b41512a91c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll @@ -46,20 +46,19 @@ define <4 x half> @test_v4s16(<4 x half> %a) #0 { ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v4, 0 +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0 -; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GCN-NEXT: v_cndmask_b32_e64 v5, v0, 0, vcc ; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v0, v4 src0_sel:WORD_1 src1_sel:DWORD +; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GCN-NEXT: v_cndmask_b32_e64 v5, v0, 0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v3, 0, s[4:5] ; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1 -; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc ; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, v4 src0_sel:WORD_1 src1_sel:DWORD -; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, s[4:5] -; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v5 -; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GCN-NEXT: v_lshl_or_b32 v0, v0, 16, v3 -; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v2 +; GCN-NEXT: v_cndmask_b32_e64 v3, v1, 0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5] +; GCN-NEXT: s_mov_b32 s4, 0x5040100 +; GCN-NEXT: v_perm_b32 v0, v0, v5, s4 +; GCN-NEXT: v_perm_b32 v1, v1, v3, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <4 x half> %a, zeroinitializer @@ -72,34 +71,31 @@ define <8 x half> @test_v8s16(<8 x half> %a) #0 { ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v0 ; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0 -; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GCN-NEXT: v_cndmask_b32_e64 v9, v0, 0, vcc ; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v0, v8 src0_sel:WORD_1 src1_sel:DWORD +; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GCN-NEXT: v_cndmask_b32_e64 v9, v0, 0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] ; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1 -; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc ; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, v8 src0_sel:WORD_1 src1_sel:DWORD +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GCN-NEXT: v_cndmask_b32_e64 v5, v1, 0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v4, 0, s[4:5] ; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v2 -; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GCN-NEXT: v_cndmask_b32_e64 v1, v5, 0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v5, v2, 0, vcc ; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v2, v8 src0_sel:WORD_1 src1_sel:DWORD +; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GCN-NEXT: v_cndmask_b32_e64 v4, v2, 0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v7, 0, s[4:5] ; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v3 -; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GCN-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v6, v3, 0, vcc ; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v3, v8 src0_sel:WORD_1 src1_sel:DWORD -; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v4 -; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v5 -; GCN-NEXT: v_cndmask_b32_e64 v3, v7, 0, s[4:5] -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v9 -; GCN-NEXT: v_lshl_or_b32 v2, v2, 16, v4 -; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v6 -; GCN-NEXT: v_lshl_or_b32 v0, v0, 16, v7 -; GCN-NEXT: v_lshl_or_b32 v3, v3, 16, v4 +; GCN-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5] +; GCN-NEXT: s_mov_b32 s4, 0x5040100 +; GCN-NEXT: v_perm_b32 v0, v0, v9, s4 +; GCN-NEXT: v_perm_b32 v1, v1, v5, s4 +; GCN-NEXT: v_perm_b32 v2, v2, v4, s4 +; GCN-NEXT: v_perm_b32 v3, v3, v7, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <8 x half> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll index b12e915c7d21b..67157195a19ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -665,6 +665,7 @@ define amdgpu_ps half @shl_i16_sv(i16 inreg %value, i16 %amount) { ; GFX6: ; %bb.0: ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: shl_i16_sv: @@ -691,6 +692,7 @@ define amdgpu_ps half @shl_i16_vs(i16 %value, i16 inreg %amount) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: shl_i16_vs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 5673a6c6e869d..a33ed368e299a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -2679,6 +2679,7 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) { ; GFX6-NEXT: v_min_i32_e32 v0, s2, v0 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: ssubsat_i16_sv: @@ -2721,6 +2722,7 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) { ; GFX6-NEXT: v_min_i32_e32 v1, v1, v2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: ssubsat_i16_vs: @@ -5360,8 +5362,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v1 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v2 ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc @@ -5393,8 +5394,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000000, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc @@ -5519,8 +5519,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v7 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v1 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v2 ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc @@ -5555,8 +5554,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v7 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000000, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll index 15740ee5476e8..1ce7ddcf9b866 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll @@ -22,11 +22,11 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x ha ; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX8-NEXT: v_fma_f16 v1, v4, v3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -50,9 +50,12 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 ; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, s4, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] %val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <3 x half> %val @@ -69,16 +72,16 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha ; GFX8-LABEL: v_constained_fma_v4f16_fpexcept_strict: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5 ; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 -; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10 +; GFX8-NEXT: v_fma_f16 v2, v7, v9, v11 ; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 -; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11 +; GFX8-NEXT: v_fma_f16 v3, v6, v8, v10 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3 @@ -135,11 +138,11 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> % ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX8-NEXT: v_fma_f16 v1, v4, v3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index d9158e3558395..49da5b949eacc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -2016,6 +2016,7 @@ define amdgpu_ps half @uaddsat_i16_sv(i16 inreg %lhs, i16 %rhs) { ; GFX6-NEXT: v_min_u32_e32 v0, s1, v0 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: uaddsat_i16_sv: @@ -2056,6 +2057,7 @@ define amdgpu_ps half @uaddsat_i16_vs(i16 %lhs, i16 inreg %rhs) { ; GFX6-NEXT: v_min_u32_e32 v1, s0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: uaddsat_i16_vs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-load.ll index ac248f57eb01e..18688e40f7384 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-load.ll @@ -1,6 +1,6 @@ ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s -; GISEL-ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_LOAD %{{[0-9]+}}:vgpr(p8) :: (load (s32) from %ir.rsrc, addrspace 8) +; GISEL-ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(f32) = G_LOAD %{{[0-9]+}}:vgpr(p8) :: (load (f32) from %ir.rsrc, addrspace 8) define float @load_from_rsrc(ptr addrspace(8) %rsrc) { body: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll index 82a15f7497f52..9fc5f26a7a2c8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll @@ -1,6 +1,6 @@ ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s128) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(i128) define float @gep_on_rsrc(ptr addrspace(8) %rsrc) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index 1fd139b06417f..50c6774a58cab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -1931,6 +1931,7 @@ define amdgpu_ps half @usubsat_i16_sv(i16 inreg %lhs, i16 %rhs) { ; GFX6-NEXT: v_min_u32_e32 v0, s0, v0 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_i16_sv: @@ -1970,6 +1971,7 @@ define amdgpu_ps half @usubsat_i16_vs(i16 %lhs, i16 inreg %rhs) { ; GFX6-NEXT: v_min_u32_e32 v1, s0, v0 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_i16_vs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index 9cf9839e69d5d..aee50dcfabcdd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -410,16 +410,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x ; GFX12-NEXT: flat_load_b128 v[12:15], v[8:9] ; GFX12-NEXT: flat_load_b128 v[16:19], v[8:9] offset:16 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x101 -; GFX12-NEXT: v_and_b32_e32 v8, 0xffff, v12 -; GFX12-NEXT: v_and_b32_e32 v9, 0xffff, v14 +; GFX12-NEXT: v_perm_b32 v12, v13, v12, 0x5040100 +; GFX12-NEXT: v_perm_b32 v13, v15, v14, 0x5040100 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: v_and_b32_e32 v14, 0xffff, v16 -; GFX12-NEXT: v_and_b32_e32 v16, 0xffff, v18 -; GFX12-NEXT: v_lshl_or_b32 v12, v13, 16, v8 -; GFX12-NEXT: v_lshl_or_b32 v13, v15, 16, v9 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-NEXT: v_lshl_or_b32 v14, v17, 16, v14 -; GFX12-NEXT: v_lshl_or_b32 v15, v19, 16, v16 +; GFX12-NEXT: v_perm_b32 v14, v17, v16, 0x5040100 +; GFX12-NEXT: v_perm_b32 v15, v19, v18, 0x5040100 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_wmma_f16_16x16x16_f16 v[12:15], v[0:3], v[4:7], v[12:15] neg_lo:[0,0,1] ; GFX12-NEXT: global_store_b128 v[10:11], v[12:15], off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index be1761227f802..375331773fb4f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -370,11 +370,8 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: flat_load_b128 v[8:11], v[4:5] ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v8 -; GFX12-NEXT: v_and_b32_e32 v5, 0xffff, v10 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_lshl_or_b32 v4, v9, 16, v4 -; GFX12-NEXT: v_lshl_or_b32 v5, v11, 16, v5 +; GFX12-NEXT: v_perm_b32 v4, v9, v8, 0x5040100 +; GFX12-NEXT: v_perm_b32 v5, v11, v10, 0x5040100 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] ; GFX12-NEXT: global_store_b64 v[6:7], v[4:5], off From 055634d7f4fe9d81c95ff4d0cd720456069e34bf Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Fri, 24 Jan 2025 10:47:04 +0000 Subject: [PATCH 16/16] update mir tests --- .../amdgpu-prelegalizer-combiner-crash.mir | 24 +- .../GlobalISel/artifact-combiner-anyext.mir | 58 +- .../artifact-combiner-build-vector.mir | 246 +- .../artifact-combiner-concat-vectors.mir | 54 +- ...artifact-combiner-cse-leaves-dead-cast.mir | 180 +- .../GlobalISel/artifact-combiner-extract.mir | 462 +- .../GlobalISel/artifact-combiner-sext.mir | 100 +- .../GlobalISel/artifact-combiner-trunc.mir | 184 +- .../artifact-combiner-unmerge-values.mir | 2193 +- .../GlobalISel/artifact-combiner-zext.mir | 310 +- ...egalization-artifact-combiner-dead-def.mir | 98 +- .../AMDGPU/GlobalISel/combine-add-nullptr.mir | 70 +- .../GlobalISel/combine-add-to-ptradd.mir | 126 +- .../combine-amdgpu-cvt-f32-ubyte.mir | 544 +- .../AMDGPU/GlobalISel/combine-ashr-narrow.mir | 176 +- .../GlobalISel/combine-ext-legalizer.mir | 72 +- .../combine-extract-vector-load.mir | 36 +- .../AMDGPU/GlobalISel/combine-fabs-fneg.mir | 98 +- .../GlobalISel/combine-fcanonicalize.mir | 473 +- .../GlobalISel/combine-fdiv-sqrt-to-rsq.mir | 680 +- .../combine-fma-add-mul-post-legalize.mir | 4909 +-- .../combine-fma-add-mul-pre-legalize.mir | 4350 +-- .../GlobalISel/combine-fma-unmerge-values.mir | 620 +- .../combine-fold-binop-into-select.mir | 1966 +- .../GlobalISel/combine-foldable-fneg.mir | 1099 +- .../GlobalISel/combine-fpneg-one-fneg.mir | 220 +- .../CodeGen/AMDGPU/GlobalISel/combine-fsh.mir | 280 +- .../AMDGPU/GlobalISel/combine-fsub-fneg.mir | 429 +- .../AMDGPU/GlobalISel/combine-itofp.mir | 292 +- .../AMDGPU/GlobalISel/combine-lshr-narrow.mir | 164 +- .../GlobalISel/combine-or-redundant.mir | 210 +- .../GlobalISel/combine-redundant-and.mir | 202 +- .../GlobalISel/combine-redundant-neg.mir | 188 +- .../CodeGen/AMDGPU/GlobalISel/combine-rot.mir | 244 +- .../CodeGen/AMDGPU/GlobalISel/combine-rsq.mir | 162 +- .../AMDGPU/GlobalISel/combine-sext-inreg.mir | 272 +- .../combine-shift-imm-chain-illegal-types.mir | 420 +- .../combine-shift-imm-chain-shlsat.mir | 238 +- .../combine-shift-of-shifted-logic-shlsat.mir | 440 +- .../AMDGPU/GlobalISel/combine-shifts.mir | 186 +- ...mbine-shl-from-extend-narrow.postlegal.mir | 436 +- ...ombine-shl-from-extend-narrow.prelegal.mir | 334 +- .../AMDGPU/GlobalISel/combine-shl-narrow.mir | 172 +- .../AMDGPU/GlobalISel/combine-trunc-shift.mir | 222 +- .../AMDGPU/GlobalISel/combine-urem-pow-2.mir | 252 +- .../AMDGPU/GlobalISel/combine-zext-trunc.mir | 216 +- .../GlobalISel/compute-num-sign-bits-med3.mir | 162 +- ...divergent-i1-phis-no-lane-mask-merging.mir | 746 +- ...ergence-divergent-i1-used-outside-loop.mir | 1414 +- .../GlobalISel/divergence-structurizer.mir | 1472 +- .../divergence-temporal-divergent-i1.mir | 472 +- .../divergence-temporal-divergent-reg.mir | 78 +- .../fmamix-constant-bus-violation.mir | 33 +- .../AMDGPU/GlobalISel/inst-select-abs.mir | 38 +- .../AMDGPU/GlobalISel/inst-select-add.mir | 52 +- .../AMDGPU/GlobalISel/inst-select-add.s16.mir | 48 +- .../GlobalISel/inst-select-amdgcn.class.mir | 60 +- .../inst-select-amdgcn.class.s16.mir | 39 +- .../GlobalISel/inst-select-amdgcn.cos.mir | 24 +- .../GlobalISel/inst-select-amdgcn.cos.s16.mir | 24 +- .../inst-select-amdgcn.cvt.pk.i16.mir | 24 +- .../inst-select-amdgcn.cvt.pk.u16.mir | 24 +- .../inst-select-amdgcn.cvt.pknorm.i16.mir | 30 +- .../inst-select-amdgcn.cvt.pknorm.u16.mir | 30 +- .../inst-select-amdgcn.cvt.pkrtz.mir | 45 +- .../inst-select-amdgcn.ds.swizzle.mir | 12 +- .../inst-select-amdgcn.exp.compr.mir | 15 +- .../GlobalISel/inst-select-amdgcn.exp.mir | 6 +- .../inst-select-amdgcn.fcmp.constants.w32.mir | 76 +- .../inst-select-amdgcn.fcmp.constants.w64.mir | 76 +- .../inst-select-amdgcn.fmad.ftz.mir | 118 +- .../GlobalISel/inst-select-amdgcn.fmed3.mir | 102 +- .../inst-select-amdgcn.fmed3.s16.mir | 44 +- .../GlobalISel/inst-select-amdgcn.fract.mir | 48 +- .../inst-select-amdgcn.fract.s16.mir | 24 +- .../inst-select-amdgcn.groupstaticsize.mir | 10 +- .../inst-select-amdgcn.mbcnt.lo.mir | 30 +- .../GlobalISel/inst-select-amdgcn.mul.u24.mir | 24 +- .../inst-select-amdgcn.mulhi.i24.mir | 24 +- .../inst-select-amdgcn.mulhi.u24.mir | 24 +- .../inst-select-amdgcn.rcp.legacy.mir | 34 +- .../GlobalISel/inst-select-amdgcn.rcp.mir | 48 +- .../GlobalISel/inst-select-amdgcn.rcp.s16.mir | 25 +- .../inst-select-amdgcn.readfirstlane.mir | 26 +- .../inst-select-amdgcn.reloc.constant.mir | 10 +- .../inst-select-amdgcn.rsq.clamp.mir | 35 +- .../inst-select-amdgcn.rsq.legacy.mir | 34 +- .../GlobalISel/inst-select-amdgcn.rsq.mir | 48 +- .../GlobalISel/inst-select-amdgcn.rsq.s16.mir | 24 +- .../inst-select-amdgcn.s.sendmsg.mir | 4 +- .../GlobalISel/inst-select-amdgcn.sffbh.mir | 18 +- .../GlobalISel/inst-select-amdgcn.sin.mir | 24 +- .../GlobalISel/inst-select-amdgcn.sin.s16.mir | 24 +- ...inst-select-amdgpu-atomic-cmpxchg-flat.mir | 148 +- ...st-select-amdgpu-atomic-cmpxchg-global.mir | 210 +- .../inst-select-amdgpu-ffbh-u32.mir | 18 +- .../inst-select-amdgpu-ffbl-b32.mir | 18 +- .../inst-select-amdgpu-wave-address.mir | 4 +- .../AMDGPU/GlobalISel/inst-select-and.mir | 198 +- .../AMDGPU/GlobalISel/inst-select-anyext.mir | 114 +- .../AMDGPU/GlobalISel/inst-select-ashr.mir | 64 +- .../GlobalISel/inst-select-ashr.s16.mir | 364 +- .../GlobalISel/inst-select-ashr.v2s16.mir | 54 +- .../inst-select-atomic-cmpxchg-local.mir | 86 +- .../inst-select-atomic-cmpxchg-region.mir | 86 +- .../inst-select-atomicrmw-add-flat.mir | 186 +- .../inst-select-atomicrmw-add-global.mir | 278 +- .../inst-select-atomicrmw-fadd-local.mir | 68 +- .../inst-select-atomicrmw-fadd-region.mir | 68 +- .../inst-select-atomicrmw-xchg-local.mir | 34 +- .../inst-select-atomicrmw-xchg-region.mir | 34 +- .../AMDGPU/GlobalISel/inst-select-bitcast.mir | 6 +- .../GlobalISel/inst-select-bitreverse.mir | 48 +- .../AMDGPU/GlobalISel/inst-select-br.mir | 3 + .../AMDGPU/GlobalISel/inst-select-brcond.mir | 145 +- .../AMDGPU/GlobalISel/inst-select-bswap.mir | 7 +- .../inst-select-build-vector-trunc.v2s16.mir | 312 +- .../GlobalISel/inst-select-build-vector.mir | 40 +- .../GlobalISel/inst-select-concat-vectors.mir | 328 +- .../GlobalISel/inst-select-constant.mir | 141 +- .../AMDGPU/GlobalISel/inst-select-copy.mir | 104 +- .../inst-select-ctlz-zero-undef.mir | 24 +- .../AMDGPU/GlobalISel/inst-select-ctpop.mir | 84 +- .../inst-select-cttz-zero-undef.mir | 24 +- .../inst-select-extract-vector-elt.mir | 276 +- .../AMDGPU/GlobalISel/inst-select-extract.mir | 148 +- .../AMDGPU/GlobalISel/inst-select-fabs.mir | 268 +- .../GlobalISel/inst-select-fadd.s16.mir | 166 +- .../GlobalISel/inst-select-fadd.s32.mir | 282 +- .../GlobalISel/inst-select-fadd.s64.mir | 279 +- .../GlobalISel/inst-select-fcanonicalize.mir | 132 +- .../AMDGPU/GlobalISel/inst-select-fceil.mir | 48 +- .../GlobalISel/inst-select-fceil.s16.mir | 62 +- .../AMDGPU/GlobalISel/inst-select-fcmp.mir | 677 +- .../inst-select-fcmp.s16.gfx11plus-fake16.mir | 252 +- .../inst-select-fcmp.s16.gfx11plus.mir | 252 +- .../GlobalISel/inst-select-fcmp.s16.mir | 297 +- .../GlobalISel/inst-select-fconstant.mir | 120 +- .../AMDGPU/GlobalISel/inst-select-fexp2.mir | 20 +- .../GlobalISel/inst-select-ffloor.s16.mir | 74 +- .../GlobalISel/inst-select-ffloor.s32.mir | 52 +- .../GlobalISel/inst-select-ffloor.s64.mir | 30 +- .../AMDGPU/GlobalISel/inst-select-fma.s32.mir | 188 +- .../GlobalISel/inst-select-fmad.s32.mir | 116 +- .../GlobalISel/inst-select-fmaxnum-ieee.mir | 148 +- .../inst-select-fmaxnum-ieee.s16.mir | 32 +- .../inst-select-fmaxnum-ieee.v2s16.mir | 15 +- .../AMDGPU/GlobalISel/inst-select-fmaxnum.mir | 148 +- .../GlobalISel/inst-select-fmaxnum.s16.mir | 32 +- .../GlobalISel/inst-select-fmaxnum.v2s16.mir | 15 +- .../GlobalISel/inst-select-fminnum-ieee.mir | 148 +- .../inst-select-fminnum-ieee.s16.mir | 32 +- .../inst-select-fminnum-ieee.v2s16.mir | 15 +- .../AMDGPU/GlobalISel/inst-select-fminnum.mir | 148 +- .../GlobalISel/inst-select-fminnum.s16.mir | 32 +- .../GlobalISel/inst-select-fminnum.v2s16.mir | 15 +- .../AMDGPU/GlobalISel/inst-select-fmul.mir | 242 +- .../GlobalISel/inst-select-fmul.v2s16.mir | 61 +- .../AMDGPU/GlobalISel/inst-select-fneg.mir | 610 +- .../AMDGPU/GlobalISel/inst-select-fptosi.mir | 87 +- .../AMDGPU/GlobalISel/inst-select-fptoui.mir | 92 +- .../GlobalISel/inst-select-fract.f64.mir | 162 +- .../GlobalISel/inst-select-frame-index.mir | 4 +- .../AMDGPU/GlobalISel/inst-select-freeze.mir | 205 +- .../AMDGPU/GlobalISel/inst-select-fshr.mir | 10 +- .../AMDGPU/GlobalISel/inst-select-i1-copy.mir | 69 +- .../AMDGPU/GlobalISel/inst-select-icmp.mir | 288 +- .../GlobalISel/inst-select-icmp.s16.mir | 97 +- .../GlobalISel/inst-select-icmp.s64.mir | 257 +- .../GlobalISel/inst-select-implicit-def.mir | 58 +- .../inst-select-insert-vector-elt.mir | 276 +- .../AMDGPU/GlobalISel/inst-select-insert.mir | 210 +- .../inst-select-intrinsic-trunc.mir | 48 +- .../inst-select-intrinsic-trunc.s16.mir | 50 +- .../GlobalISel/inst-select-inttoptr.mir | 10 +- .../inst-select-load-atomic-flat.mir | 149 +- .../inst-select-load-atomic-global.mir | 166 +- .../inst-select-load-atomic-local.mir | 112 +- .../GlobalISel/inst-select-load-constant.mir | 380 +- .../GlobalISel/inst-select-load-flat.mir | 600 +- ...st-select-load-global-old-legalization.mir | 664 +- .../inst-select-load-global-saddr.mir | 336 +- .../GlobalISel/inst-select-load-global.mir | 640 +- .../inst-select-load-global.s96.mir | 62 +- .../GlobalISel/inst-select-load-local-128.mir | 128 +- .../GlobalISel/inst-select-load-local.mir | 262 +- .../GlobalISel/inst-select-load-private.mir | 456 +- .../GlobalISel/inst-select-load-smrd.mir | 66 +- .../AMDGPU/GlobalISel/inst-select-lshr.mir | 65 +- .../GlobalISel/inst-select-lshr.s16.mir | 364 +- .../GlobalISel/inst-select-lshr.v2s16.mir | 54 +- .../GlobalISel/inst-select-mad_64_32.mir | 28 +- .../GlobalISel/inst-select-merge-values.mir | 244 +- .../AMDGPU/GlobalISel/inst-select-mul.mir | 32 +- .../AMDGPU/GlobalISel/inst-select-or.mir | 190 +- .../GlobalISel/inst-select-pattern-add3.mir | 76 +- .../GlobalISel/inst-select-pattern-and-or.mir | 50 +- .../GlobalISel/inst-select-pattern-or3.mir | 37 +- .../GlobalISel/inst-select-pattern-smed3.mir | 102 +- .../inst-select-pattern-smed3.s16.mir | 104 +- .../GlobalISel/inst-select-pattern-umed3.mir | 102 +- .../inst-select-pattern-umed3.s16.mir | 104 +- .../GlobalISel/inst-select-pattern-xor3.mir | 64 +- .../AMDGPU/GlobalISel/inst-select-phi.mir | 192 +- ...st-select-pseudo-scalar-transcendental.mir | 138 +- .../AMDGPU/GlobalISel/inst-select-ptr-add.mir | 60 +- .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 236 +- .../GlobalISel/inst-select-ptrtoint.mir | 20 +- .../GlobalISel/inst-select-returnaddress.mir | 18 +- .../AMDGPU/GlobalISel/inst-select-sbfx.mir | 20 +- .../inst-select-scalar-float-sop1.mir | 198 +- .../inst-select-scalar-float-sop2.mir | 233 +- .../inst-select-scalar-float-sopc.mir | 336 +- .../inst-select-scalar-packed.xfail.mir | 80 +- .../AMDGPU/GlobalISel/inst-select-select.mir | 304 +- .../GlobalISel/inst-select-sext-inreg.mir | 102 +- .../AMDGPU/GlobalISel/inst-select-sext.mir | 88 +- .../GlobalISel/inst-select-sextload-local.mir | 38 +- .../AMDGPU/GlobalISel/inst-select-shl.mir | 65 +- .../AMDGPU/GlobalISel/inst-select-shl.s16.mir | 364 +- .../GlobalISel/inst-select-shl.v2s16.mir | 54 +- .../AMDGPU/GlobalISel/inst-select-sitofp.mir | 44 +- .../AMDGPU/GlobalISel/inst-select-smax.mir | 32 +- .../AMDGPU/GlobalISel/inst-select-smin.mir | 32 +- .../AMDGPU/GlobalISel/inst-select-smulh.mir | 46 +- .../inst-select-stacksave-stackrestore.mir | 7 +- .../inst-select-store-atomic-flat.mir | 72 +- .../inst-select-store-atomic-local.mir | 98 +- .../GlobalISel/inst-select-store-flat.mir | 400 +- .../GlobalISel/inst-select-store-global.mir | 366 +- .../inst-select-store-global.s96.mir | 74 +- .../GlobalISel/inst-select-store-local.mir | 251 +- .../GlobalISel/inst-select-store-private.mir | 248 +- .../AMDGPU/GlobalISel/inst-select-sub.mir | 20 +- .../AMDGPU/GlobalISel/inst-select-trunc.mir | 142 +- .../GlobalISel/inst-select-trunc.v2s16.mir | 12 +- .../GlobalISel/inst-select-uadde.gfx10.mir | 40 +- .../AMDGPU/GlobalISel/inst-select-uadde.mir | 60 +- .../AMDGPU/GlobalISel/inst-select-uaddo.mir | 60 +- .../AMDGPU/GlobalISel/inst-select-ubfx.mir | 20 +- .../AMDGPU/GlobalISel/inst-select-uitofp.mir | 32 +- .../AMDGPU/GlobalISel/inst-select-umax.mir | 32 +- .../AMDGPU/GlobalISel/inst-select-umin.mir | 32 +- .../AMDGPU/GlobalISel/inst-select-umulh.mir | 46 +- .../GlobalISel/inst-select-unmerge-values.mir | 134 +- .../GlobalISel/inst-select-usube.gfx10.mir | 40 +- .../AMDGPU/GlobalISel/inst-select-usube.mir | 60 +- .../AMDGPU/GlobalISel/inst-select-usubo.mir | 60 +- .../AMDGPU/GlobalISel/inst-select-xor.mir | 190 +- .../AMDGPU/GlobalISel/inst-select-zext.mir | 88 +- .../GlobalISel/inst-select-zextload-local.mir | 38 +- .../AMDGPU/GlobalISel/legalize-abs.mir | 590 +- .../AMDGPU/GlobalISel/legalize-add.mir | 812 +- .../GlobalISel/legalize-addrspacecast.mir | 238 +- .../GlobalISel/legalize-amdgcn.if-invalid.mir | 76 +- .../GlobalISel/legalize-amdgcn.if.xfail.mir | 10 +- .../GlobalISel/legalize-amdgcn.rsq.clamp.mir | 66 +- .../legalize-amdgcn.wavefrontsize.mir | 13 +- .../legalize-amdgcn.workitem.id.mir | 88 +- .../AMDGPU/GlobalISel/legalize-and.mir | 856 +- .../AMDGPU/GlobalISel/legalize-anyext.mir | 648 +- .../AMDGPU/GlobalISel/legalize-ashr.mir | 3304 +- .../legalize-atomic-cmpxchg-with-success.mir | 116 +- .../GlobalISel/legalize-atomic-cmpxchg.mir | 80 +- .../GlobalISel/legalize-atomicrmw-add.mir | 32 +- .../GlobalISel/legalize-atomicrmw-and.mir | 32 +- .../legalize-atomicrmw-fadd-global.mir | 14 +- .../legalize-atomicrmw-fadd-local.mir | 16 +- .../GlobalISel/legalize-atomicrmw-max.mir | 32 +- .../GlobalISel/legalize-atomicrmw-min.mir | 32 +- .../GlobalISel/legalize-atomicrmw-nand.mir | 11 +- .../GlobalISel/legalize-atomicrmw-or.mir | 32 +- .../GlobalISel/legalize-atomicrmw-sub.mir | 32 +- .../GlobalISel/legalize-atomicrmw-umax.mir | 32 +- .../GlobalISel/legalize-atomicrmw-umin.mir | 32 +- .../legalize-atomicrmw-xchg-flat.mir | 18 +- .../GlobalISel/legalize-atomicrmw-xchg.mir | 32 +- .../GlobalISel/legalize-atomicrmw-xor.mir | 32 +- .../AMDGPU/GlobalISel/legalize-bitcast.mir | 4030 +-- .../AMDGPU/GlobalISel/legalize-bitreverse.mir | 150 +- .../AMDGPU/GlobalISel/legalize-block-addr.mir | 2 +- .../AMDGPU/GlobalISel/legalize-brcond.mir | 481 +- .../AMDGPU/GlobalISel/legalize-bswap.mir | 544 +- .../legalize-build-vector-splat.mir | 20 +- .../legalize-build-vector-trunc.mir | 37 +- .../GlobalISel/legalize-build-vector.mir | 944 +- .../GlobalISel/legalize-build-vector.s16.mir | 931 +- .../GlobalISel/legalize-concat-vectors.mir | 162 +- .../AMDGPU/GlobalISel/legalize-constant.mir | 94 +- .../GlobalISel/legalize-ctlz-zero-undef.mir | 198 +- .../AMDGPU/GlobalISel/legalize-ctlz.mir | 260 +- .../AMDGPU/GlobalISel/legalize-ctpop.mir | 302 +- .../GlobalISel/legalize-cttz-zero-undef.mir | 190 +- .../AMDGPU/GlobalISel/legalize-cttz.mir | 232 +- .../legalize-extract-vector-elt.mir | 1580 +- .../AMDGPU/GlobalISel/legalize-extract.mir | 968 +- .../legalize-extractelement-crash.mir | 24 +- .../AMDGPU/GlobalISel/legalize-fabs.mir | 571 +- .../AMDGPU/GlobalISel/legalize-fadd.mir | 1125 +- .../GlobalISel/legalize-fcanonicalize.mir | 690 +- .../AMDGPU/GlobalISel/legalize-fceil.mir | 593 +- .../AMDGPU/GlobalISel/legalize-fcmp-s32.mir | 48 +- .../AMDGPU/GlobalISel/legalize-fcmp.mir | 692 +- .../AMDGPU/GlobalISel/legalize-fconstant.mir | 34 +- .../AMDGPU/GlobalISel/legalize-fcopysign.mir | 1764 +- .../AMDGPU/GlobalISel/legalize-fcos.mir | 1044 +- .../AMDGPU/GlobalISel/legalize-fdiv.mir | 4770 +-- .../AMDGPU/GlobalISel/legalize-fexp.mir | 1170 +- .../AMDGPU/GlobalISel/legalize-fexp2.mir | 581 +- .../AMDGPU/GlobalISel/legalize-ffloor.mir | 872 +- .../AMDGPU/GlobalISel/legalize-flog.mir | 413 +- .../AMDGPU/GlobalISel/legalize-flog10.mir | 413 +- .../AMDGPU/GlobalISel/legalize-flog2.mir | 152 +- .../AMDGPU/GlobalISel/legalize-fma.mir | 1430 +- .../AMDGPU/GlobalISel/legalize-fmad.s16.mir | 2288 +- .../AMDGPU/GlobalISel/legalize-fmad.s32.mir | 1128 +- .../AMDGPU/GlobalISel/legalize-fmad.s64.mir | 144 +- .../AMDGPU/GlobalISel/legalize-fmaxnum.mir | 1757 +- .../AMDGPU/GlobalISel/legalize-fminnum.mir | 1757 +- .../AMDGPU/GlobalISel/legalize-fmul.mir | 1125 +- .../AMDGPU/GlobalISel/legalize-fneg.mir | 516 +- .../AMDGPU/GlobalISel/legalize-fpext.mir | 323 +- .../AMDGPU/GlobalISel/legalize-fpow.mir | 1068 +- .../AMDGPU/GlobalISel/legalize-fpowi.mir | 200 +- .../AMDGPU/GlobalISel/legalize-fptosi.mir | 1268 +- .../AMDGPU/GlobalISel/legalize-fptoui.mir | 1120 +- .../AMDGPU/GlobalISel/legalize-fptrunc.mir | 882 +- .../AMDGPU/GlobalISel/legalize-freeze.mir | 792 +- .../AMDGPU/GlobalISel/legalize-fshl.mir | 1870 +- .../AMDGPU/GlobalISel/legalize-fshr.mir | 2370 +- .../AMDGPU/GlobalISel/legalize-fsin.mir | 1044 +- .../AMDGPU/GlobalISel/legalize-fsqrt.mir | 1068 +- .../AMDGPU/GlobalISel/legalize-fsub.mir | 1254 +- .../AMDGPU/GlobalISel/legalize-icmp.mir | 1028 +- .../legalize-implicit-def-s1025.mir | 19 +- .../GlobalISel/legalize-implicit-def.mir | 514 +- .../GlobalISel/legalize-insert-vector-elt.mir | 1268 +- .../AMDGPU/GlobalISel/legalize-insert.mir | 1930 +- .../legalize-intrinsic-amdgcn-fdiv-fast.mir | 80 +- .../GlobalISel/legalize-intrinsic-round.mir | 2335 +- .../GlobalISel/legalize-intrinsic-trunc.mir | 555 +- .../AMDGPU/GlobalISel/legalize-inttoptr.mir | 146 +- .../AMDGPU/GlobalISel/legalize-jump-table.mir | 6 +- .../legalize-llvm.amdgcn.s.buffer.load.mir | 340 +- .../legalize-load-constant-32bit.mir | 60 +- .../GlobalISel/legalize-load-constant.mir | 10674 +++--- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 17528 +++++----- .../GlobalISel/legalize-load-global.mir | 22160 ++++++------ .../AMDGPU/GlobalISel/legalize-load-local.mir | 23974 ++++++------- .../legalize-load-memory-metadata.mir | 44 +- .../GlobalISel/legalize-load-private.mir | 27888 ++++++++-------- .../AMDGPU/GlobalISel/legalize-lshr.mir | 3302 +- .../AMDGPU/GlobalISel/legalize-memcpy.mir | 34 +- .../GlobalISel/legalize-memcpyinline.mir | 34 +- .../AMDGPU/GlobalISel/legalize-memmove.mir | 34 +- .../AMDGPU/GlobalISel/legalize-memset.mir | 32 +- .../legalize-merge-values-build-vector.mir | 130 +- .../GlobalISel/legalize-merge-values.mir | 1578 +- .../AMDGPU/GlobalISel/legalize-mul.mir | 1018 +- .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir | 766 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 1746 +- .../AMDGPU/GlobalISel/legalize-ptr-add.mir | 108 +- .../AMDGPU/GlobalISel/legalize-ptrmask.mir | 154 +- .../AMDGPU/GlobalISel/legalize-ptrtoint.mir | 118 +- .../AMDGPU/GlobalISel/legalize-rotl-rotr.mir | 600 +- .../AMDGPU/GlobalISel/legalize-sadde.mir | 188 +- .../AMDGPU/GlobalISel/legalize-saddo.mir | 666 +- .../AMDGPU/GlobalISel/legalize-saddsat.mir | 1912 +- .../AMDGPU/GlobalISel/legalize-sbfx.mir | 112 +- .../AMDGPU/GlobalISel/legalize-sdiv.mir | 7791 ++--- .../AMDGPU/GlobalISel/legalize-select.mir | 1790 +- .../AMDGPU/GlobalISel/legalize-sext-inreg.mir | 1568 +- .../AMDGPU/GlobalISel/legalize-sext.mir | 762 +- .../legalize-sextload-constant-32bit.mir | 90 +- .../GlobalISel/legalize-sextload-flat.mir | 94 +- .../GlobalISel/legalize-sextload-global.mir | 341 +- .../GlobalISel/legalize-sextload-local.mir | 56 +- .../GlobalISel/legalize-sextload-private.mir | 56 +- .../AMDGPU/GlobalISel/legalize-shl.mir | 3094 +- .../GlobalISel/legalize-shuffle-vector.mir | 356 +- .../legalize-shuffle-vector.s16.mir | 1382 +- .../AMDGPU/GlobalISel/legalize-sitofp.mir | 904 +- .../AMDGPU/GlobalISel/legalize-smax.mir | 784 +- .../AMDGPU/GlobalISel/legalize-smin.mir | 784 +- .../AMDGPU/GlobalISel/legalize-smulh.mir | 664 +- .../AMDGPU/GlobalISel/legalize-smulo.mir | 814 +- .../AMDGPU/GlobalISel/legalize-srem.mir | 7415 ++-- .../AMDGPU/GlobalISel/legalize-sshlsat.mir | 1956 +- .../AMDGPU/GlobalISel/legalize-ssube.mir | 188 +- .../AMDGPU/GlobalISel/legalize-ssubo.mir | 666 +- .../AMDGPU/GlobalISel/legalize-ssubsat.mir | 1912 +- .../GlobalISel/legalize-store-global.mir | 12300 +++---- .../AMDGPU/GlobalISel/legalize-store.mir | 1382 +- .../GlobalISel/legalize-strict_fsub.mir | 48 +- .../AMDGPU/GlobalISel/legalize-sub.mir | 822 +- .../AMDGPU/GlobalISel/legalize-trap-gfx11.mir | 40 +- .../AMDGPU/GlobalISel/legalize-trap.mir | 22 +- .../AMDGPU/GlobalISel/legalize-trunc.mir | 440 +- .../AMDGPU/GlobalISel/legalize-uadde.mir | 190 +- .../AMDGPU/GlobalISel/legalize-uaddo.mir | 500 +- .../AMDGPU/GlobalISel/legalize-uaddsat.mir | 1242 +- .../AMDGPU/GlobalISel/legalize-ubfx.mir | 113 +- .../AMDGPU/GlobalISel/legalize-udiv.mir | 6472 ++-- .../AMDGPU/GlobalISel/legalize-uitofp.mir | 800 +- .../AMDGPU/GlobalISel/legalize-umax.mir | 746 +- .../AMDGPU/GlobalISel/legalize-umin.mir | 746 +- .../AMDGPU/GlobalISel/legalize-umulh.mir | 1186 +- .../AMDGPU/GlobalISel/legalize-umulo.mir | 1327 +- .../GlobalISel/legalize-unmerge-values.mir | 1432 +- .../AMDGPU/GlobalISel/legalize-urem.mir | 6176 ++-- .../AMDGPU/GlobalISel/legalize-ushlsat.mir | 1552 +- .../AMDGPU/GlobalISel/legalize-usube.mir | 190 +- .../AMDGPU/GlobalISel/legalize-usubo.mir | 500 +- .../AMDGPU/GlobalISel/legalize-usubsat.mir | 1190 +- .../GlobalISel/legalize-vector-args-gfx7.mir | 571 +- .../legalize-vector-args-gfx8-plus.mir | 726 +- .../AMDGPU/GlobalISel/legalize-xor.mir | 766 +- .../AMDGPU/GlobalISel/legalize-zext.mir | 798 +- .../legalize-zextload-constant-32bit.mir | 90 +- .../GlobalISel/legalize-zextload-flat.mir | 94 +- .../GlobalISel/legalize-zextload-global.mir | 341 +- .../GlobalISel/legalize-zextload-local.mir | 56 +- .../GlobalISel/legalize-zextload-private.mir | 56 +- .../llvm.amdgcn.image.atomic.dim.mir | 92 +- .../localizer-wrong-insert-point.mir | 5 +- .../madmix-constant-bus-violation.mir | 33 +- .../AMDGPU/GlobalISel/mul-known-bits.i64.mir | 332 +- ...tlz-from-umul-to-lshr-in-postlegalizer.mir | 16 +- .../AMDGPU/GlobalISel/no-legalize-atomic.mir | 20 +- ...alizer-combiner-buildvector-identities.mir | 142 +- .../postlegalizer-combiner-divrem.mir | 208 +- .../postlegalizer-combiner-fcanonicalize.mir | 78 +- .../postlegalizer-combiner-freeze.mir | 68 +- .../postlegalizer-combiner-reassoc.mir | 26 +- ...lizer-combiner-sextload-from-sextinreg.mir | 96 +- ...zer-combiner-trunc-bitcast-buildvector.mir | 210 +- .../postlegalizer-combiner-unmerge-undef.mir | 10 +- ...stlegalizer-combiner-zextload-from-and.mir | 120 +- .../GlobalISel/postlegalizercombiner-and.mir | 236 +- .../GlobalISel/postlegalizercombiner-ashr.mir | 126 +- .../postlegalizercombiner-load-and-mask.mir | 16 +- .../GlobalISel/postlegalizercombiner-lshr.mir | 126 +- .../GlobalISel/postlegalizercombiner-mul.mir | 56 +- .../GlobalISel/postlegalizercombiner-sbfx.mir | 140 +- .../postlegalizercombiner-select.mir | 80 +- .../GlobalISel/postlegalizercombiner-shl.mir | 126 +- .../GlobalISel/postlegalizercombiner-ubfx.mir | 132 +- .../prelegalizer-combiner-divrem.mir | 486 +- .../prelegalizer-combiner-fptrunc_fpext.mir | 76 +- ...relegalizer-combiner-redundant-bitcast.mir | 40 +- ...relegalizer-combiner-sext_inreg-to-and.mir | 90 +- .../regbankcombiner-clamp-fmed3-const.mir | 522 +- .../regbankcombiner-clamp-minmax-const.mir | 920 +- .../regbankcombiner-fmed3-minmax-const.mir | 752 +- .../regbankcombiner-redundant-and.mir | 20 +- .../GlobalISel/regbankcombiner-smed3.mir | 342 +- .../GlobalISel/regbankcombiner-umed3.mir | 342 +- .../AMDGPU/GlobalISel/regbankselect-abs.mir | 66 +- .../GlobalISel/regbankselect-add.s16.mir | 106 +- .../GlobalISel/regbankselect-add.s32.mir | 52 +- .../GlobalISel/regbankselect-add.v2s16.mir | 80 +- .../regbankselect-amdgcn-exp-compr.mir | 28 +- .../GlobalISel/regbankselect-amdgcn-exp.mir | 64 +- .../regbankselect-amdgcn-s-buffer-load.mir | 114 +- .../regbankselect-amdgcn.ballot.i64.mir | 56 +- .../GlobalISel/regbankselect-amdgcn.class.mir | 64 +- .../regbankselect-amdgcn.cvt.pkrtz.mir | 72 +- .../regbankselect-amdgcn.div.fmas.mir | 162 +- .../regbankselect-amdgcn.div.scale.mir | 72 +- .../regbankselect-amdgcn.ds.append.mir | 8 +- .../regbankselect-amdgcn.ds.bpermute.mir | 16 +- .../regbankselect-amdgcn.ds.consume.mir | 8 +- .../regbankselect-amdgcn.ds.gws.init.mir | 56 +- .../regbankselect-amdgcn.ds.gws.sema.v.mir | 18 +- .../regbankselect-amdgcn.ds.ordered.add.mir | 56 +- .../regbankselect-amdgcn.ds.ordered.swap.mir | 56 +- .../regbankselect-amdgcn.ds.permute.mir | 16 +- .../regbankselect-amdgcn.ds.swizzle.mir | 10 +- .../regbankselect-amdgcn.else.32.mir | 8 +- .../regbankselect-amdgcn.else.64.mir | 8 +- .../GlobalISel/regbankselect-amdgcn.fcmp.mir | 72 +- .../regbankselect-amdgcn.fmul.legacy.mir | 72 +- .../regbankselect-amdgcn.groupstaticsize.mir | 4 +- .../GlobalISel/regbankselect-amdgcn.icmp.mir | 56 +- .../regbankselect-amdgcn.interp.mov.mir | 24 +- .../regbankselect-amdgcn.interp.p1.f16.mir | 34 +- .../regbankselect-amdgcn.interp.p1.mir | 64 +- .../regbankselect-amdgcn.interp.p2.f16.mir | 50 +- .../regbankselect-amdgcn.interp.p2.mir | 50 +- ...gbankselect-amdgcn.kernarg.segment.ptr.mir | 2 +- .../GlobalISel/regbankselect-amdgcn.kill.mir | 60 +- .../regbankselect-amdgcn.lds.direct.load.mir | 18 +- .../regbankselect-amdgcn.lds.param.load.mir | 18 +- .../regbankselect-amdgcn.live.mask.mir | 8 +- .../regbankselect-amdgcn.mfma.gfx90a.mir | 301 +- .../regbankselect-amdgcn.mfma.gfx942.mir | 418 +- .../GlobalISel/regbankselect-amdgcn.mfma.mir | 1160 +- .../regbankselect-amdgcn.ps.live.mir | 9 +- .../regbankselect-amdgcn.readfirstlane.mir | 18 +- .../regbankselect-amdgcn.readlane.mir | 126 +- .../regbankselect-amdgcn.s.buffer.load.mir | 100 +- ...elect-amdgcn.s.get.waveid.in.workgroup.mir | 4 +- .../regbankselect-amdgcn.s.getpc.mir | 4 +- .../regbankselect-amdgcn.s.getreg.mir | 4 +- .../regbankselect-amdgcn.s.memrealtime.mir | 4 +- .../regbankselect-amdgcn.s.memtime.mir | 4 +- .../regbankselect-amdgcn.s.sendmsg.mir | 18 +- .../regbankselect-amdgcn.s.sendmsghalt.mir | 18 +- .../regbankselect-amdgcn.update.dpp.mir | 36 +- .../regbankselect-amdgcn.wqm.demote.mir | 60 +- .../GlobalISel/regbankselect-amdgcn.wqm.mir | 18 +- .../regbankselect-amdgcn.wqm.vote.mir | 50 +- .../regbankselect-amdgcn.writelane.mir | 90 +- .../GlobalISel/regbankselect-amdgcn.wwm.mir | 18 +- .../regbankselect-amdgpu-ffbh-u32.mir | 16 +- .../regbankselect-amdgpu-ffbl-b32.mir | 16 +- .../regbankselect-amdgpu-wave-address.mir | 4 +- .../GlobalISel/regbankselect-and-s1.mir | 316 +- .../AMDGPU/GlobalISel/regbankselect-and.mir | 629 +- .../GlobalISel/regbankselect-anyext.mir | 216 +- .../AMDGPU/GlobalISel/regbankselect-ashr.mir | 262 +- .../GlobalISel/regbankselect-assert-align.mir | 6 +- .../GlobalISel/regbankselect-assert-zext.mir | 60 +- .../regbankselect-atomic-cmpxchg.mir | 48 +- .../regbankselect-atomicrmw-add.mir | 30 +- .../regbankselect-atomicrmw-and.mir | 30 +- .../regbankselect-atomicrmw-fadd.mir | 12 +- .../regbankselect-atomicrmw-max.mir | 30 +- .../regbankselect-atomicrmw-min.mir | 30 +- .../GlobalISel/regbankselect-atomicrmw-or.mir | 30 +- .../regbankselect-atomicrmw-sub.mir | 30 +- .../regbankselect-atomicrmw-umax.mir | 30 +- .../regbankselect-atomicrmw-umin.mir | 30 +- .../regbankselect-atomicrmw-xchg.mir | 30 +- .../regbankselect-atomicrmw-xor.mir | 30 +- .../GlobalISel/regbankselect-bitcast.mir | 16 +- .../GlobalISel/regbankselect-bitreverse.mir | 38 +- .../GlobalISel/regbankselect-block-addr.mir | 2 +- .../GlobalISel/regbankselect-brcond.mir | 143 +- .../AMDGPU/GlobalISel/regbankselect-bswap.mir | 18 +- .../GlobalISel/regbankselect-build-vector.mir | 300 +- .../regbankselect-concat-vector.mir | 144 +- .../GlobalISel/regbankselect-constant.mir | 18 +- .../AMDGPU/GlobalISel/regbankselect-copy.mir | 472 +- .../regbankselect-ctlz-zero-undef.mir | 58 +- .../AMDGPU/GlobalISel/regbankselect-ctpop.mir | 54 +- .../regbankselect-cttz-zero-undef.mir | 58 +- .../GlobalISel/regbankselect-default.mir | 26 +- .../regbankselect-dyn-stackalloc.mir | 422 +- .../regbankselect-extract-vector-elt.mir | 2070 +- .../GlobalISel/regbankselect-extract.mir | 44 +- .../AMDGPU/GlobalISel/regbankselect-fabs.mir | 32 +- .../AMDGPU/GlobalISel/regbankselect-fadd.mir | 72 +- .../regbankselect-fcanonicalize.mir | 34 +- .../AMDGPU/GlobalISel/regbankselect-fceil.mir | 22 +- .../AMDGPU/GlobalISel/regbankselect-fcmp.mir | 79 +- .../AMDGPU/GlobalISel/regbankselect-fexp2.mir | 22 +- .../AMDGPU/GlobalISel/regbankselect-flog2.mir | 22 +- .../AMDGPU/GlobalISel/regbankselect-fma.mir | 200 +- .../AMDGPU/GlobalISel/regbankselect-fmul.mir | 72 +- .../AMDGPU/GlobalISel/regbankselect-fneg.mir | 32 +- .../AMDGPU/GlobalISel/regbankselect-fpext.mir | 22 +- .../GlobalISel/regbankselect-fptosi.mir | 22 +- .../GlobalISel/regbankselect-fptoui.mir | 22 +- .../GlobalISel/regbankselect-fptrunc.mir | 22 +- .../GlobalISel/regbankselect-frame-index.mir | 4 +- .../GlobalISel/regbankselect-freeze.mir | 364 +- .../AMDGPU/GlobalISel/regbankselect-fshr.mir | 152 +- .../AMDGPU/GlobalISel/regbankselect-fsqrt.mir | 34 +- .../AMDGPU/GlobalISel/regbankselect-fsub.mir | 72 +- .../AMDGPU/GlobalISel/regbankselect-icmp.mir | 426 +- .../GlobalISel/regbankselect-icmp.s16.mir | 88 +- .../GlobalISel/regbankselect-illegal-copy.mir | 4 +- .../GlobalISel/regbankselect-implicit-def.mir | 164 +- .../regbankselect-insert-vector-elt.mir | 1099 +- .../GlobalISel/regbankselect-insert.mir | 136 +- .../regbankselect-intrinsic-trunc.mir | 22 +- .../GlobalISel/regbankselect-inttoptr.mir | 16 +- .../AMDGPU/GlobalISel/regbankselect-load.mir | 909 +- .../AMDGPU/GlobalISel/regbankselect-lshr.mir | 266 +- .../GlobalISel/regbankselect-mad_64_32.mir | 772 +- .../GlobalISel/regbankselect-merge-values.mir | 96 +- .../regbankselect-mui-regbanklegalize.mir | 1202 +- .../regbankselect-mui-regbankselect.mir | 1068 +- .../regbankselect-mui-salu-float.mir | 128 +- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 1821 +- .../AMDGPU/GlobalISel/regbankselect-mul.mir | 152 +- .../AMDGPU/GlobalISel/regbankselect-or.mir | 842 +- .../GlobalISel/regbankselect-phi-s1.mir | 1952 +- .../AMDGPU/GlobalISel/regbankselect-phi.mir | 3022 +- ...ankselect-pseudo-scalar-transcendental.mir | 238 +- .../GlobalISel/regbankselect-ptr-add.mir | 44 +- .../GlobalISel/regbankselect-ptrmask.mir | 44 +- .../GlobalISel/regbankselect-ptrtoint.mir | 8 +- .../GlobalISel/regbankselect-reg-sequence.mir | 65 +- .../AMDGPU/GlobalISel/regbankselect-sadde.mir | 215 +- .../GlobalISel/regbankselect-salu-float.mir | 166 +- .../AMDGPU/GlobalISel/regbankselect-sbfx.mir | 368 +- .../GlobalISel/regbankselect-select.mir | 2320 +- .../GlobalISel/regbankselect-sext-inreg.mir | 190 +- .../AMDGPU/GlobalISel/regbankselect-sext.mir | 250 +- .../GlobalISel/regbankselect-sextload.mir | 24 +- .../AMDGPU/GlobalISel/regbankselect-shl.mir | 258 +- .../GlobalISel/regbankselect-sitofp.mir | 18 +- .../AMDGPU/GlobalISel/regbankselect-smax.mir | 232 +- .../AMDGPU/GlobalISel/regbankselect-smin.mir | 240 +- .../AMDGPU/GlobalISel/regbankselect-smulh.mir | 88 +- ...gbankselect-split-scalar-load-metadata.mir | 46 +- .../AMDGPU/GlobalISel/regbankselect-ssube.mir | 215 +- .../AMDGPU/GlobalISel/regbankselect-sub.mir | 52 +- .../AMDGPU/GlobalISel/regbankselect-trunc.mir | 48 +- .../AMDGPU/GlobalISel/regbankselect-uadde.mir | 215 +- .../AMDGPU/GlobalISel/regbankselect-uaddo.mir | 54 +- .../AMDGPU/GlobalISel/regbankselect-ubfx.mir | 368 +- .../GlobalISel/regbankselect-uitofp.mir | 18 +- .../AMDGPU/GlobalISel/regbankselect-umax.mir | 244 +- .../AMDGPU/GlobalISel/regbankselect-umin.mir | 244 +- .../AMDGPU/GlobalISel/regbankselect-umulh.mir | 88 +- .../regbankselect-uniform-load-noclobber.mir | 166 +- .../regbankselect-unmerge-values.mir | 48 +- .../AMDGPU/GlobalISel/regbankselect-usube.mir | 215 +- .../AMDGPU/GlobalISel/regbankselect-usubo.mir | 54 +- .../regbankselect-waterfall-agpr.mir | 96 +- .../regbankselect-widen-scalar-loads.mir | 418 +- .../AMDGPU/GlobalISel/regbankselect-xor.mir | 852 +- .../AMDGPU/GlobalISel/regbankselect-zext.mir | 244 +- .../GlobalISel/regbankselect-zextload.mir | 36 +- .../AMDGPU/GlobalISel/regbankselect.mir | 36 +- .../GlobalISel/twoaddr-extract-dyn-v7f64.mir | 92 +- 629 files changed, 180320 insertions(+), 168167 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-prelegalizer-combiner-crash.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-prelegalizer-combiner-crash.mir index 00050157e9799..8e0a33bc6e0b3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-prelegalizer-combiner-crash.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-prelegalizer-combiner-crash.mir @@ -10,17 +10,21 @@ body: | ; GCN-LABEL: name: non_inlineable_imm_splat ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GCN-NEXT: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[COPY]], [[BUILD_VECTOR]] - ; GCN-NEXT: $vgpr0 = COPY [[SUB]](<2 x s16>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH4200 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[C]](f16) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[C]](f16) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST]](i16), [[BITCAST1]](i16) + ; GCN-NEXT: [[SUB:%[0-9]+]]:_(<2 x i16>) = G_SUB [[COPY]], [[BUILD_VECTOR]] + ; GCN-NEXT: $vgpr0 = COPY [[SUB]](<2 x i16>) ; GCN-NEXT: SI_RETURN implicit $vgpr0 - %0:_(<2 x s16>) = COPY $vgpr0 - %2:_(s16) = G_FCONSTANT half 0xH4200 - %1:_(<2 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16) - %3:_(<2 x s16>) = G_SUB %0, %1 - $vgpr0 = COPY %3(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(f16) = G_FCONSTANT half 0xH4200 + %2:_(i16) = G_BITCAST %1(f16) + %3:_(i16) = G_BITCAST %1(f16) + %4:_(<2 x i16>) = G_BUILD_VECTOR %2(i16), %3(i16) + %5:_(<2 x i16>) = G_SUB %0, %4 + $vgpr0 = COPY %5(<2 x i16>) SI_RETURN implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir index 4fccde95f3da7..0abda8e188c65 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s16_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - %2:_(<2 x s32>) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i32>) = G_ANYEXT %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -27,16 +27,16 @@ body: | ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s16_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - %2:_(<2 x s64>) = G_ANYEXT %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ANYEXT]](i64), [[ANYEXT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i64>) = G_ANYEXT %1(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -48,13 +48,13 @@ body: | ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<2 x s16>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_TRUNC %0 - %2:_(<2 x s16>) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<2 x i16>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i8>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i16>) = G_ANYEXT %1(<2 x i8>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -66,10 +66,10 @@ body: | ; CHECK-LABEL: name: test_anyext_trunc_v3s32_to_v3s16_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(<3 x i32>) = G_ANYEXT %1(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir index 01e4162f0d503..becf0701586ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir @@ -14,21 +14,21 @@ body: | ; GFX9-LABEL: name: revisit_build_vector_unmerge_user ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 2 - %2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1 - %3:_(<2 x s64>) = G_ZEXT %0 - %4:_(<2 x s64>) = G_SHL %3, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV]](i32) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %1(i32) + %3:_(<2 x i64>) = G_ZEXT %0(<2 x i32>) + %4:_(<2 x i64>) = G_SHL %3, %2(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<2 x i64>) ... --- @@ -40,12 +40,12 @@ body: | ; GFX9-LABEL: name: copy_scalar ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64) - %3:_(s64) = G_MERGE_VALUES %1, %2 - $vgpr2_vgpr3= COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(i64) + %3:_(i64) = G_MERGE_VALUES %1(i32), %2(i32) + $vgpr2_vgpr3 = COPY %3(i64) ... --- @@ -57,12 +57,12 @@ body: | ; GFX9-LABEL: name: copy_vector_using_elements ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %3:_(<2 x s32>) = G_BUILD_VECTOR %1, %2 - $vgpr2_vgpr3= COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %3:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + $vgpr2_vgpr3 = COPY %3(<2 x i32>) ... --- @@ -74,12 +74,12 @@ body: | ; GFX9-LABEL: name: copy_vector_using_subvectors ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>) - %3:_(<4 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr2_vgpr3= COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>), %2:_(<2 x i16>) = G_UNMERGE_VALUES %0(<4 x i16>) + %3:_(<4 x i16>) = G_CONCAT_VECTORS %1(<2 x i16>), %2(<2 x i16>) + $vgpr2_vgpr3 = COPY %3(<4 x i16>) ... --- @@ -91,14 +91,14 @@ body: | ; GFX9-LABEL: name: shuffle_vector_elements ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV]](s32) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %3:_(<2 x s32>) = G_BUILD_VECTOR %2, %1 - $vgpr2_vgpr3= COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[UV]](i32) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %3:_(<2 x i32>) = G_BUILD_VECTOR %2(i32), %1(i32) + $vgpr2_vgpr3 = COPY %3(<2 x i32>) ... --- @@ -110,16 +110,16 @@ body: | ; GFX9-LABEL: name: insert_element ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(<2 x s32>) = G_BUILD_VECTOR %2, %1 - $vgpr2_vgpr3= COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[COPY1]](i32) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(<2 x i32>) = G_BUILD_VECTOR %2(i32), %1(i32) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... --- @@ -131,16 +131,16 @@ body: | ; GFX9-LABEL: name: unmerge_to_sub_vectors ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[UV]](<2 x s32>) - ; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[UV1]](<2 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0(<4 x s32>) - %5:_(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %6:_(<2 x s32>) = G_BUILD_VECTOR %3, %4 - $vgpr4_vgpr5= COPY %5 - $vgpr6_vgpr7= COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[UV]](<2 x i32>) + ; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[UV1]](<2 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32), %2:_(i32), %3:_(i32), %4:_(i32) = G_UNMERGE_VALUES %0(<4 x i32>) + %5:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %6:_(<2 x i32>) = G_BUILD_VECTOR %3(i32), %4(i32) + $vgpr4_vgpr5 = COPY %5(<2 x i32>) + $vgpr6_vgpr7 = COPY %6(<2 x i32>) ... --- @@ -152,18 +152,18 @@ body: | ; GFX9-LABEL: name: cant_unmerge_to_sub_vectors ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0(<4 x s32>) - %5:_(<2 x s32>) = G_BUILD_VECTOR %1, %4 - %6:_(<2 x s32>) = G_BUILD_VECTOR %2, %3 - $vgpr4_vgpr5= COPY %5 - $vgpr6_vgpr7= COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[UV2]](i32) + ; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32), %2:_(i32), %3:_(i32), %4:_(i32) = G_UNMERGE_VALUES %0(<4 x i32>) + %5:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %4(i32) + %6:_(<2 x i32>) = G_BUILD_VECTOR %2(i32), %3(i32) + $vgpr4_vgpr5 = COPY %5(<2 x i32>) + $vgpr6_vgpr7 = COPY %6(<2 x i32>) ... --- @@ -175,16 +175,16 @@ body: | ; GFX9-LABEL: name: concat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>) - ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(<4 x s32>) = G_BUILD_VECTOR %2, %3, %4, %5 - $vgpr4_vgpr5_vgpr6_vgpr7= COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[COPY]](<2 x i32>), [[COPY1]](<2 x i32>) + ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(<4 x i32>) = G_BUILD_VECTOR %2(i32), %3(i32), %4(i32), %5(i32) + $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %6(<4 x i32>) ... --- @@ -196,13 +196,13 @@ body: | ; GFX9-LABEL: name: concat_same_vector ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY]](<2 x s32>) - ; GFX9-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %3:_(<4 x s32>) = G_BUILD_VECTOR %1, %2, %1, %2 - $vgpr2_vgpr3_vgpr4_vgpr5= COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[COPY]](<2 x i32>), [[COPY]](<2 x i32>) + ; GFX9-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %3:_(<4 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32), %1(i32), %2(i32) + $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %3(<4 x i32>) ... --- @@ -214,18 +214,18 @@ body: | ; GFX9-LABEL: name: shuffle_not_concat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV]](s32), [[UV1]](s32), [[UV3]](s32) - ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(<4 x s32>) = G_BUILD_VECTOR %4, %2, %3, %5 - $vgpr4_vgpr5_vgpr6_vgpr7= COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV]](i32), [[UV1]](i32), [[UV3]](i32) + ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(<4 x i32>) = G_BUILD_VECTOR %4(i32), %2(i32), %3(i32), %5(i32) + $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %6(<4 x i32>) ... --- @@ -237,20 +237,20 @@ body: | ; GFX9-LABEL: name: not_a_concat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[COPY2]](s32) - ; GFX9-NEXT: $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = COPY [[BUILD_VECTOR]](<5 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %7:_(<5 x s32>) = G_BUILD_VECTOR %3, %4, %5, %6, %2 - $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9= COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[COPY2]](i32) + ; GFX9-NEXT: $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = COPY [[BUILD_VECTOR]](<5 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(i32) = COPY $vgpr4 + %3:_(i32), %4:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %5:_(i32), %6:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %7:_(<5 x i32>) = G_BUILD_VECTOR %3(i32), %4(i32), %5(i32), %6(i32), %2(i32) + $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = COPY %7(<5 x i32>) ... --- @@ -262,11 +262,11 @@ body: | ; GFX9-LABEL: name: value_finder_look_through_copy ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %3:_(s32) = COPY %1 - %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %2 - $vgpr2_vgpr3= COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %3:_(i32) = COPY %1(i32) + %4:_(<2 x i32>) = G_BUILD_VECTOR %3(i32), %2(i32) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir index 341160b3e90f5..c95e8a74b43a2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir @@ -14,31 +14,31 @@ body: | ; GFX9-LABEL: name: revisit_concat_vectors_unmerge_user ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR2]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR3]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SHL]](<2 x s16>), [[SHL1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(s16) = G_CONSTANT i16 2 - %3:_(<4 x s16>) = G_BUILD_VECTOR %2, %2, %2, %2 - %4:_(<4 x s16>) = G_ANYEXT %1 - %5:_(<4 x s16>) = G_SHL %4, %3 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C3]](i16), [[C3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C3]](i16), [[C3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR2]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR3]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SHL]](<2 x i16>), [[SHL1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(i32) + %2:_(i16) = G_CONSTANT i16 2 + %3:_(<4 x i16>) = G_BUILD_VECTOR %2(i16), %2(i16), %2(i16), %2(i16) + %4:_(<4 x i16>) = G_ANYEXT %1(<4 x i8>) + %5:_(<4 x i16>) = G_SHL %4, %3(<4 x i16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir index bd2f5181fd525..3039860dedef4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir @@ -15,100 +15,100 @@ body: | bb.0: ; CHECK-LABEL: name: artifact_combiner_sext_already_exists ; CHECK: %undef:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: %load:_(s32) = G_LOAD %undef(p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) - ; CHECK-NEXT: %unmerge3_0:_(s1) = G_TRUNC %load(s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR %load, [[C]](s32) - ; CHECK-NEXT: %unmerge3_1:_(s1) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %load, [[C1]](s32) - ; CHECK-NEXT: %unmerge3_2:_(s1) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %load, [[C2]](s32) - ; CHECK-NEXT: %unmerge3_3:_(s1) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %load, [[C3]](s32) - ; CHECK-NEXT: %unmerge3_4:_(s1) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %load, [[C4]](s32) - ; CHECK-NEXT: %unmerge3_5:_(s1) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %load, [[C5]](s32) - ; CHECK-NEXT: %unmerge3_6:_(s1) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR %load, [[C6]](s32) - ; CHECK-NEXT: %unmerge3_7:_(s1) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %negone:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: %and0:_(s1) = G_XOR %unmerge3_0, %negone - ; CHECK-NEXT: %and1:_(s1) = G_XOR %unmerge3_1, %negone - ; CHECK-NEXT: %and2:_(s1) = G_XOR %unmerge3_2, %negone - ; CHECK-NEXT: %and3:_(s1) = G_XOR %unmerge3_3, %negone - ; CHECK-NEXT: %and4:_(s1) = G_XOR %unmerge3_4, %negone - ; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone - ; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone - ; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]] - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT %and2(s1) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT2]], [[C10]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C8]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT %and3(s1) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT3]], [[C10]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) - ; CHECK-NEXT: %merge0:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT %and4(s1) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT4]], [[C10]] - ; CHECK-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT %and5(s1) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT5]], [[C10]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT %and6(s1) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT6]], [[C10]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT %and7(s1) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT7]], [[C10]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) - ; CHECK-NEXT: %merge1:_(s32) = G_OR [[OR3]], [[SHL5]] - ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32) + ; CHECK-NEXT: %load:_(i32) = G_LOAD %undef(p4) :: (dereferenceable invariant load (i8), align 16, addrspace 4) + ; CHECK-NEXT: %unmerge3_0:_(i1) = G_TRUNC %load(i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR %load, [[C]](i32) + ; CHECK-NEXT: %unmerge3_1:_(i1) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR %load, [[C1]](i32) + ; CHECK-NEXT: %unmerge3_2:_(i1) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR %load, [[C2]](i32) + ; CHECK-NEXT: %unmerge3_3:_(i1) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR %load, [[C3]](i32) + ; CHECK-NEXT: %unmerge3_4:_(i1) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR %load, [[C4]](i32) + ; CHECK-NEXT: %unmerge3_5:_(i1) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR %load, [[C5]](i32) + ; CHECK-NEXT: %unmerge3_6:_(i1) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR %load, [[C6]](i32) + ; CHECK-NEXT: %unmerge3_7:_(i1) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %negone:_(i1) = G_CONSTANT i1 true + ; CHECK-NEXT: %and0:_(i1) = G_XOR %unmerge3_0, %negone + ; CHECK-NEXT: %and1:_(i1) = G_XOR %unmerge3_1, %negone + ; CHECK-NEXT: %and2:_(i1) = G_XOR %unmerge3_2, %negone + ; CHECK-NEXT: %and3:_(i1) = G_XOR %unmerge3_3, %negone + ; CHECK-NEXT: %and4:_(i1) = G_XOR %unmerge3_4, %negone + ; CHECK-NEXT: %and5:_(i1) = G_XOR %unmerge3_5, %negone + ; CHECK-NEXT: %and6:_(i1) = G_XOR %unmerge3_6, %negone + ; CHECK-NEXT: %and7:_(i1) = G_XOR %unmerge3_7, %negone + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT %and0(i1) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT]], [[C10]] + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT %and1(i1) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT1]], [[C10]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT %and2(i1) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT2]], [[C10]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C8]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT %and3(i1) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SEXT3]], [[C10]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C9]](i32) + ; CHECK-NEXT: %merge0:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT %and4(i1) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[SEXT4]], [[C10]] + ; CHECK-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT %and5(i1) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[SEXT5]], [[C10]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT %and6(i1) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[SEXT6]], [[C10]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C8]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT %and7(i1) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SEXT7]], [[C10]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C9]](i32) + ; CHECK-NEXT: %merge1:_(i32) = G_OR [[OR3]], [[SHL5]] + ; CHECK-NEXT: %bv:_(<2 x i32>) = G_BUILD_VECTOR %merge0(i32), %merge1(i32) ; CHECK-NEXT: %null:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: G_STORE %bv(<2 x s32>), %null(p1) :: (store (<2 x s32>), addrspace 1) + ; CHECK-NEXT: G_STORE %bv(<2 x i32>), %null(p1) :: (store (<2 x i32>), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %undef:_(p4) = G_IMPLICIT_DEF - %load:_(s32) = G_LOAD %undef :: (dereferenceable invariant load (s8), align 16, addrspace 4) - %trunc:_(s8) = G_TRUNC %load - %unmerge3_0:_(s1), %unmerge3_1:_(s1), %unmerge3_2:_(s1), %unmerge3_3:_(s1), %unmerge3_4:_(s1), %unmerge3_5:_(s1), %unmerge3_6:_(s1), %unmerge3_7:_(s1) = G_UNMERGE_VALUES %trunc - %negone:_(s1) = G_CONSTANT i1 true - %and0:_(s1) = G_XOR %unmerge3_0, %negone - %and1:_(s1) = G_XOR %unmerge3_1, %negone - %and2:_(s1) = G_XOR %unmerge3_2, %negone - %and3:_(s1) = G_XOR %unmerge3_3, %negone - %and4:_(s1) = G_XOR %unmerge3_4, %negone - %and5:_(s1) = G_XOR %unmerge3_5, %negone - %and6:_(s1) = G_XOR %unmerge3_6, %negone - %and7:_(s1) = G_XOR %unmerge3_7, %negone - %boolvec:_(<8 x s1>) = G_BUILD_VECTOR %and0(s1), %and1(s1), %and2(s1), %and3(s1), %and4(s1), %and5(s1), %and6(s1), %and7(s1) - %sext:_(<8 x s8>) = G_SEXT %boolvec(<8 x s1>) - %sext_lo:_(<4 x s8>), %sext_hi:_(<4 x s8>) = G_UNMERGE_VALUES %sext(<8 x s8>) - %sext0:_(s8), %sext1:_(s8), %sext2:_(s8), %sext3:_(s8) = G_UNMERGE_VALUES %sext_lo(<4 x s8>) - %merge0:_(s32) = G_MERGE_VALUES %sext0(s8), %sext1(s8), %sext2(s8), %sext3(s8) - %sext4:_(s8) = G_SEXT %and4(s1) - %sext5:_(s8) = G_SEXT %and5(s1) - %sext6:_(s8) = G_SEXT %and6(s1) - %sext7:_(s8) = G_SEXT %and7(s1) - %merge1:_(s32) = G_MERGE_VALUES %sext4, %sext5, %sext6, %sext7 - %bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32) + %load:_(i32) = G_LOAD %undef(p4) :: (dereferenceable invariant load (i8), align 16, addrspace 4) + %trunc:_(i8) = G_TRUNC %load(i32) + %unmerge3_0:_(i1), %unmerge3_1:_(i1), %unmerge3_2:_(i1), %unmerge3_3:_(i1), %unmerge3_4:_(i1), %unmerge3_5:_(i1), %unmerge3_6:_(i1), %unmerge3_7:_(i1) = G_UNMERGE_VALUES %trunc(i8) + %negone:_(i1) = G_CONSTANT i1 true + %and0:_(i1) = G_XOR %unmerge3_0, %negone + %and1:_(i1) = G_XOR %unmerge3_1, %negone + %and2:_(i1) = G_XOR %unmerge3_2, %negone + %and3:_(i1) = G_XOR %unmerge3_3, %negone + %and4:_(i1) = G_XOR %unmerge3_4, %negone + %and5:_(i1) = G_XOR %unmerge3_5, %negone + %and6:_(i1) = G_XOR %unmerge3_6, %negone + %and7:_(i1) = G_XOR %unmerge3_7, %negone + %boolvec:_(<8 x i1>) = G_BUILD_VECTOR %and0(i1), %and1(i1), %and2(i1), %and3(i1), %and4(i1), %and5(i1), %and6(i1), %and7(i1) + %sext:_(<8 x i8>) = G_SEXT %boolvec(<8 x i1>) + %sext_lo:_(<4 x i8>), %sext_hi:_(<4 x i8>) = G_UNMERGE_VALUES %sext(<8 x i8>) + %sext0:_(i8), %sext1:_(i8), %sext2:_(i8), %sext3:_(i8) = G_UNMERGE_VALUES %sext_lo(<4 x i8>) + %merge0:_(i32) = G_MERGE_VALUES %sext0(i8), %sext1(i8), %sext2(i8), %sext3(i8) + %sext4:_(i8) = G_SEXT %and4(i1) + %sext5:_(i8) = G_SEXT %and5(i1) + %sext6:_(i8) = G_SEXT %and6(i1) + %sext7:_(i8) = G_SEXT %and7(i1) + %merge1:_(i32) = G_MERGE_VALUES %sext4(i8), %sext5(i8), %sext6(i8), %sext7(i8) + %bv:_(<2 x i32>) = G_BUILD_VECTOR %merge0(i32), %merge1(i32) %null:_(p1) = G_CONSTANT i64 0 - G_STORE %bv(<2 x s32>), %null :: (store (<2 x s32>), addrspace 1) + G_STORE %bv(<2 x i32>), %null(p1) :: (store (<2 x i32>), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir index b72abbf557b36..e03e9aee532da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -7,14 +7,14 @@ name: extract_s32_merge_s64_s32_s32_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 0 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_EXTRACT %2(i64), 0 + $vgpr0 = COPY %3(i32) ... --- @@ -23,14 +23,14 @@ name: extract_s32_merge_s64_s32_s32_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 32 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_EXTRACT %2(i64), 32 + $vgpr0 = COPY %3(i32) ... --- @@ -39,14 +39,14 @@ name: extract_s64_merge_s128_s64_s64_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s64) = G_EXTRACT %2, 0 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[C]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i64) = G_EXTRACT %2(i128), 0 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -55,14 +55,14 @@ name: extract_s64_merge_s128_s64_s64_offset64 body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s64) = G_EXTRACT %2, 64 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[C]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i64) = G_EXTRACT %2(i128), 64 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -71,14 +71,14 @@ name: extract_s32_merge_s128_s64_s64_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 0 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[C]](i64), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i32) = G_EXTRACT %2(i128), 0 + $vgpr0 = COPY %3(i32) ... --- @@ -87,14 +87,14 @@ name: extract_s32_merge_s128_s64_s64_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset32 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 32 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[C]](i64), 32 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i32) = G_EXTRACT %2(i128), 32 + $vgpr0 = COPY %3(i32) ... --- @@ -103,14 +103,14 @@ name: extract_s32_merge_s128_s64_s64_offset64 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 64 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[C]](i64), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i32) = G_EXTRACT %2(i128), 64 + $vgpr0 = COPY %3(i32) ... --- @@ -119,14 +119,14 @@ name: extract_s32_merge_s128_s64_s64_offset96 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset96 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 96 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[C]](i64), 32 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i32) = G_EXTRACT %2(i128), 96 + $vgpr0 = COPY %3(i32) ... # Destination size fits, but is skewed from the start of the register. @@ -136,16 +136,16 @@ name: extract_s16_merge_s128_s64_s64_offset18 body: | bb.0: ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset18 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s16) = G_EXTRACT %2, 18 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[C]](i64), 18 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i16) = G_EXTRACT %2(i128), 18 + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... # Destination size fits, but is skewed from the start of the register. @@ -155,16 +155,16 @@ name: extract_s16_merge_s128_s64_s64_offset82 body: | bb.0: ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset82 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s16) = G_EXTRACT %2, 82 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[C]](i64), 18 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i16) = G_EXTRACT %2(i128), 82 + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... @@ -175,16 +175,16 @@ name: extract_s64_merge_s128_s64_s64_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset32 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s128), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s64) = G_EXTRACT %2, 32 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](i64), [[C1]](i64) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[MV]](i128), 32 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i64) = G_EXTRACT %2(i128), 32 + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -195,16 +195,16 @@ name: extract_s16_merge_s32_s32_offset1 body: | bb.0: ; CHECK-LABEL: name: extract_s16_merge_s32_s32_offset1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[MV]](s64), 1 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_EXTRACT %2, 1 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C]](i32), [[C1]](i32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[MV]](i64), 1 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_EXTRACT %2(i64), 1 + $vgpr0 = COPY %3(i32) ... @@ -216,15 +216,15 @@ name: extract_s32_merge_s96_s32_s32_s32_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s96) = G_MERGE_VALUES %0, %1, %2 - %4:_(s32) = G_EXTRACT %3, 0 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i96) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32) + %4:_(i32) = G_EXTRACT %3(i96), 0 + $vgpr0 = COPY %4(i32) ... --- @@ -233,15 +233,15 @@ name: extract_s32_merge_s96_s32_s32_s32_offset64 body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset64 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s96) = G_MERGE_VALUES %0, %1, %2 - %4:_(s32) = G_EXTRACT %3, 64 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i96) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32) + %4:_(i32) = G_EXTRACT %3(i96), 64 + $vgpr0 = COPY %4(i32) ... --- @@ -250,18 +250,18 @@ name: extract_s64_merge_s96_s32_s32_s32_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s96) = G_MERGE_VALUES %0, %1, %2 - %4:_(s64) = G_EXTRACT %3, 0 - $vgpr0_vgpr1 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[C]](i32), [[C1]](i32), [[C2]](i32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[MV]](i96), 0 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](i64) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i96) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32) + %4:_(i64) = G_EXTRACT %3(i96), 0 + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -270,18 +270,18 @@ name: extract_s64_merge_s96_s32_s32_s32_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s96) = G_MERGE_VALUES %0, %1, %2 - %4:_(s64) = G_EXTRACT %3, 32 - $vgpr0_vgpr1 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[C]](i32), [[C1]](i32), [[C2]](i32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[MV]](i96), 32 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](i64) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i96) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32) + %4:_(i64) = G_EXTRACT %3(i96), 32 + $vgpr0_vgpr1 = COPY %4(i64) ... # Test build_vector sources @@ -291,14 +291,14 @@ name: extract_s64_build_vector_v2s64_s64_s64_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 - %3:_(s64) = G_EXTRACT %2, 0 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[C]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + %3:_(i64) = G_EXTRACT %2(<2 x i64>), 0 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -307,14 +307,14 @@ name: extract_s64_build_vector_v2s64_s64_s64_offset64 body: | bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 - %3:_(s64) = G_EXTRACT %2, 64 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[C]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + %3:_(i64) = G_EXTRACT %2(<2 x i64>), 64 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -323,21 +323,21 @@ name: extract_s64_build_vector_v2s64_s64_s64_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset32 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BITCAST]](s128) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 - %3:_(s64) = G_EXTRACT %2, 32 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C1]](i64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST]](i128) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C2]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + %3:_(i64) = G_EXTRACT %2(<2 x i64>), 32 + $vgpr0_vgpr1 = COPY %3(i64) ... # Test extracting something smaller than the element size @@ -347,14 +347,14 @@ name: extract_s32_build_vector_v2s64_s64_s64_offset64 body: | bb.0: ; CHECK-LABEL: name: extract_s32_build_vector_v2s64_s64_s64_offset64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 - %3:_(s32) = G_EXTRACT %2, 64 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[C]](i64), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + %3:_(i32) = G_EXTRACT %2(<2 x i64>), 64 + $vgpr0 = COPY %3(i32) ... @@ -368,14 +368,14 @@ body: | ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(<2 x s16>) = G_EXTRACT %2, 0 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(<2 x i16>) = G_EXTRACT %2(<4 x i16>), 0 + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -387,14 +387,14 @@ body: | ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(<2 x s16>) = G_EXTRACT %2, 32 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(<2 x i16>) = G_EXTRACT %2(<4 x i16>), 32 + $vgpr0 = COPY %3(<2 x i16>) ... # Test extracting only a single element, not a subvector @@ -407,15 +407,15 @@ body: | ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(s16) = G_EXTRACT %2, 32 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(i16) = G_EXTRACT %2(<4 x i16>), 32 + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -427,17 +427,17 @@ body: | ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(s16) = G_EXTRACT %2, 48 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(i16) = G_EXTRACT %2(<4 x i16>), 48 + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... # Test extracting less than an element @@ -450,14 +450,14 @@ body: | ; CHECK-LABEL: name: extract_s8_build_vector_v2s64_v2s16_v2s16_offset48 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[COPY]](<2 x s16>), 16 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(s8) = G_EXTRACT %2, 48 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i8) = G_EXTRACT [[COPY]](<2 x i16>), 16 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(i8) = G_EXTRACT %2(<4 x i16>), 48 + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir index e5d020ef58785..daeccd79d1adf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir @@ -10,16 +10,16 @@ body: | ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s16_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - %2:_(<2 x s32>) = G_SEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 16 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i32>) = G_SEXT %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -31,18 +31,18 @@ body: | ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s16_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT_INREG]](s64), [[SEXT_INREG1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - %2:_(<2 x s64>) = G_SEXT %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT1]], 16 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SEXT_INREG]](i64), [[SEXT_INREG1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i64>) = G_SEXT %1(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -51,27 +51,25 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; The G_SEXT_INREG doesn't lower here because G_TRUNC is both illegal and - ; unable to legalize. This prevents further legalization. ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 8 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_TRUNC %0 - %2:_(<2 x s16>) = G_SEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 8 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i8>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i16>) = G_SEXT %1(<2 x i8>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -83,15 +81,15 @@ body: | ; CHECK-LABEL: name: test_sext_trunc_v3s32_to_v3s16_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16 - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(<3 x s32>) = G_SEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 16 + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 16 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(<3 x i32>) = G_SEXT %1(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir index ae683ec4d7998..3164937491840 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir @@ -6,17 +6,15 @@ name: trunc_s16_merge_s64_s32 body: | bb.0: - ; Test that trunc(merge) with trunc-size < merge-source-size creates a trunc - ; of the merge source ; CHECK-LABEL: name: trunc_s16_merge_s64_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s16) = G_TRUNC %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i16) = G_TRUNC %2(i64) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -24,15 +22,14 @@ name: trunc_s32_merge_s64_s32 body: | bb.0: - ; Test that trunc(merge) with trunc-size == merge-source-size is eliminated ; CHECK-LABEL: name: trunc_s32_merge_s64_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_TRUNC %2 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_TRUNC %2(i64) + $vgpr0 = COPY %3(i32) ... --- @@ -40,18 +37,16 @@ name: trunc_s64_merge_s128_s32 body: | bb.0: - ; Test that trunc(merge) with trunc-size > merge-source-size combines to a - ; smaller merge ; CHECK-LABEL: name: trunc_s64_merge_s128_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1 - %3:_(s64) = G_TRUNC %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C]](i32), [[C1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i128) = G_MERGE_VALUES %0(i32), %1(i32), %0(i32), %1(i32) + %3:_(i64) = G_TRUNC %2(i128) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -59,18 +54,17 @@ name: trunc_s32_merge_s128_p0 body: | bb.0: - ; Test that trunc(merge) with a non-scalar merge source is not combined ; CHECK-LABEL: name: trunc_s32_merge_s128_p0 ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[MV]](i128) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) %0:_(p0) = G_CONSTANT i64 0 %1:_(p0) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_TRUNC %2 - $vgpr0 = COPY %3 + %2:_(i128) = G_MERGE_VALUES %0(p0), %1(p0) + %3:_(i32) = G_TRUNC %2(i128) + $vgpr0 = COPY %3(i32) ... --- @@ -78,18 +72,17 @@ name: trunc_s64_merge_s128_p0 body: | bb.0: - ; Test that trunc(merge) with a non-scalar merge source is not combined ; CHECK-LABEL: name: trunc_s64_merge_s128_p0 ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[TRUNC]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[MV]](i128) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[TRUNC]](i64) %0:_(p0) = G_CONSTANT i64 0 %1:_(p0) = G_CONSTANT i64 1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s64) = G_TRUNC %2 - $vgpr0_vgpr1 = COPY %3 + %2:_(i128) = G_MERGE_VALUES %0(p0), %1(p0) + %3:_(i64) = G_TRUNC %2(i128) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -97,18 +90,17 @@ name: trunc_s128_merge_s192_p0 body: | bb.0: - ; Test that trunc(merge) with a non-scalar merge source is not combined ; CHECK-LABEL: name: trunc_s128_merge_s192_p0 ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[C]](p0), [[C1]](p0), [[C]](p0) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[MV]](s192) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](s128) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[C]](p0), [[C1]](p0), [[C]](p0) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i128) = G_TRUNC [[MV]](i192) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](i128) %0:_(p0) = G_CONSTANT i64 0 %1:_(p0) = G_CONSTANT i64 1 - %2:_(s192) = G_MERGE_VALUES %0, %1, %0 - %3:_(s128) = G_TRUNC %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + %2:_(i192) = G_MERGE_VALUES %0(p0), %1(p0), %0(p0) + %3:_(i128) = G_TRUNC %2(i192) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -116,18 +108,15 @@ name: trunc_s68_merge_s128_s32 body: | bb.0: - ; Test that trunc(merge) with trunc-size > merge-source-size and - ; trunc-size % merge-source-size != 0 can be combined after the G_TRUNCs - ; have been combined. ; CHECK-LABEL: name: trunc_s68_merge_s128_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1 - %3:_(s68) = G_TRUNC %2 - %4:_(s32) = G_TRUNC %3 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i128) = G_MERGE_VALUES %0(i32), %1(i32), %0(i32), %1(i32) + %3:_(i68) = G_TRUNC %2(i128) + %4:_(i32) = G_TRUNC %3(i68) + $vgpr0 = COPY %4(i32) ... --- @@ -135,15 +124,14 @@ name: trunc_trunc body: | bb.0: - ; Test that trunc(trunc) is combined to a single trunc ; CHECK-LABEL: name: trunc_trunc - ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = G_IMPLICIT_DEF - %1:_(s48) = G_TRUNC %0 - %2:_(s32) = G_TRUNC %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[DEF]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i64) = G_IMPLICIT_DEF + %1:_(i48) = G_TRUNC %0(i64) + %2:_(i32) = G_TRUNC %1(i48) + $vgpr0 = COPY %2(i32) ... --- @@ -151,14 +139,13 @@ name: trunc_sext body: | bb.0: - ; Test that trunc(sext) is replaced with sext source. ; CHECK-LABEL: name: trunc_sext - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s32) = G_IMPLICIT_DEF - %1:_(s64) = G_SEXT %0 - %2:_(s32) = G_TRUNC %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i32) = G_IMPLICIT_DEF + %1:_(i64) = G_SEXT %0(i32) + %2:_(i32) = G_TRUNC %1(i64) + $vgpr0 = COPY %2(i32) ... --- @@ -166,14 +153,13 @@ name: trunc_zext body: | bb.0: - ; Test that trunc(zext) is replaced with zext source. ; CHECK-LABEL: name: trunc_zext - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s32) = G_IMPLICIT_DEF - %1:_(s64) = G_ZEXT %0 - %2:_(s32) = G_TRUNC %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i32) = G_IMPLICIT_DEF + %1:_(i64) = G_ZEXT %0(i32) + %2:_(i32) = G_TRUNC %1(i64) + $vgpr0 = COPY %2(i32) ... --- @@ -181,14 +167,13 @@ name: trunc_anyext body: | bb.0: - ; Test that trunc(anyext) is replaced with anyext source. ; CHECK-LABEL: name: trunc_anyext - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s32) = G_IMPLICIT_DEF - %1:_(s64) = G_ANYEXT %0 - %2:_(s32) = G_TRUNC %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i32) = G_IMPLICIT_DEF + %1:_(i64) = G_ANYEXT %0(i32) + %2:_(i32) = G_TRUNC %1(i64) + $vgpr0 = COPY %2(i32) ... --- @@ -196,15 +181,14 @@ name: trunc_ext body: | bb.0: - ; Test that trunc(sext (trunc (...))) is replaced with source. ; CHECK-LABEL: name: trunc_ext - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s32) = G_IMPLICIT_DEF - %1:_(s64) = G_SEXT %0 - %2:_(s32) = G_TRUNC %1 - %3:_(s128) = G_ZEXT %2 - %4:_(s64) = G_TRUNC %3 - %5:_(s32) = G_TRUNC %4 - $vgpr0 = COPY %5 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i32) = G_IMPLICIT_DEF + %1:_(i64) = G_SEXT %0(i32) + %2:_(i32) = G_TRUNC %1(i64) + %3:_(i128) = G_ZEXT %2(i32) + %4:_(i64) = G_TRUNC %3(i128) + %5:_(i32) = G_TRUNC %4(i64) + $vgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 8300b2bc05e96..c7a297a73013c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -6,33 +6,33 @@ name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(s1) = G_ICMP intpred(ne), %2(s32), %4 - %7:_(s1) = G_ICMP intpred(ne), %3(s32), %5 - %8:_(s32) = G_ANYEXT %6(s1) - %9:_(s32) = G_ANYEXT %7(s1) - %10:_(<2 x s32>) = G_BUILD_VECTOR %8, %9 - %11:_(<2 x s1>) = G_TRUNC %10(<2 x s32>) - %12:_(s1), %13:_(s1) = G_UNMERGE_VALUES %11 - %14:_(s32) = G_SEXT %12 - %15:_(s32) = G_SEXT %13 - %16:_(<2 x s32>) = G_BUILD_VECTOR %14, %15 - $vgpr0_vgpr1 = COPY %16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %4 + %7:_(i1) = G_ICMP intpred(ne), %3(i32), %5 + %8:_(i32) = G_ANYEXT %6(i1) + %9:_(i32) = G_ANYEXT %7(i1) + %10:_(<2 x i32>) = G_BUILD_VECTOR %8(i32), %9(i32) + %11:_(<2 x i1>) = G_TRUNC %10(<2 x i32>) + %12:_(i1), %13:_(i1) = G_UNMERGE_VALUES %11(<2 x i1>) + %14:_(i32) = G_SEXT %12(i1) + %15:_(i32) = G_SEXT %13(i1) + %16:_(<2 x i32>) = G_BUILD_VECTOR %14(i32), %15(i32) + $vgpr0_vgpr1 = COPY %16(<2 x i32>) ... @@ -43,35 +43,35 @@ name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32_extra_copies body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32_extra_copies - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(s1) = G_ICMP intpred(ne), %2(s32), %4 - %7:_(s1) = G_ICMP intpred(ne), %3(s32), %5 - %8:_(s32) = G_ANYEXT %6(s1) - %9:_(s32) = G_ANYEXT %7(s1) - %10:_(<2 x s32>) = G_BUILD_VECTOR %8, %9 - %11:_(<2 x s32>) = COPY %10 - %12:_(<2 x s1>) = G_TRUNC %11(<2 x s32>) - %13:_(<2 x s1>) = COPY %12 - %14:_(s1), %15:_(s1) = G_UNMERGE_VALUES %13 - %16:_(s32) = G_SEXT %14 - %17:_(s32) = G_SEXT %15 - %18:_(<2 x s32>) = G_BUILD_VECTOR %16, %17 - $vgpr0_vgpr1 = COPY %18 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %4 + %7:_(i1) = G_ICMP intpred(ne), %3(i32), %5 + %8:_(i32) = G_ANYEXT %6(i1) + %9:_(i32) = G_ANYEXT %7(i1) + %10:_(<2 x i32>) = G_BUILD_VECTOR %8(i32), %9(i32) + %11:_(<2 x i32>) = COPY %10(<2 x i32>) + %12:_(<2 x i1>) = G_TRUNC %11(<2 x i32>) + %13:_(<2 x i1>) = COPY %12(<2 x i1>) + %14:_(i1), %15:_(i1) = G_UNMERGE_VALUES %13(<2 x i1>) + %16:_(i32) = G_SEXT %14(i1) + %17:_(i32) = G_SEXT %15(i1) + %18:_(<2 x i32>) = G_BUILD_VECTOR %16(i32), %17(i32) + $vgpr0_vgpr1 = COPY %18(<2 x i32>) ... @@ -80,39 +80,31 @@ name: test_unmerge_values_s32_sext_v2s32_of_build_vector_v2s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_sext_v2s32_of_build_vector_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(s1) = G_ICMP intpred(ne), %2(s32), %4 - %7:_(s1) = G_ICMP intpred(ne), %3(s32), %5 - %8:_(s16) = G_ANYEXT %6 - %9:_(s16) = G_ANYEXT %7 - %10:_(<2 x s16>) = G_BUILD_VECTOR %8, %9 - %11:_(<2 x s32>) = G_SEXT %10 - %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %11 - %14:_(<2 x s32>) = G_BUILD_VECTOR %12, %13 - $vgpr0_vgpr1 = COPY %14 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ANYEXT]](i16) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT]](i32), [[SEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %4 + %7:_(i1) = G_ICMP intpred(ne), %3(i32), %5 + %8:_(i16) = G_ANYEXT %6(i1) + %9:_(i16) = G_ANYEXT %7(i1) + %10:_(<2 x i16>) = G_BUILD_VECTOR %8(i16), %9(i16) + %11:_(<2 x i32>) = G_SEXT %10(<2 x i16>) + %12:_(i32), %13:_(i32) = G_UNMERGE_VALUES %11(<2 x i32>) + %14:_(<2 x i32>) = G_BUILD_VECTOR %12(i32), %13(i32) + $vgpr0_vgpr1 = COPY %14(<2 x i32>) ... @@ -121,39 +113,31 @@ name: test_unmerge_values_s32_zext_v2s32_of_build_vector_v2s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_zext_v2s32_of_build_vector_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(s1) = G_ICMP intpred(ne), %2(s32), %4 - %7:_(s1) = G_ICMP intpred(ne), %3(s32), %5 - %8:_(s16) = G_ANYEXT %6(s1) - %9:_(s16) = G_ANYEXT %7(s1) - %10:_(<2 x s16>) = G_BUILD_VECTOR %8, %9 - %11:_(<2 x s32>) = G_ZEXT %10 - %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %11 - %14:_(<2 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32) - $vgpr0_vgpr1 = COPY %14(<2 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ANYEXT]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ZEXT]](i32), [[ZEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %4 + %7:_(i1) = G_ICMP intpred(ne), %3(i32), %5 + %8:_(i16) = G_ANYEXT %6(i1) + %9:_(i16) = G_ANYEXT %7(i1) + %10:_(<2 x i16>) = G_BUILD_VECTOR %8(i16), %9(i16) + %11:_(<2 x i32>) = G_ZEXT %10(<2 x i16>) + %12:_(i32), %13:_(i32) = G_UNMERGE_VALUES %11(<2 x i32>) + %14:_(<2 x i32>) = G_BUILD_VECTOR %12(i32), %13(i32) + $vgpr0_vgpr1 = COPY %14(<2 x i32>) ... @@ -162,37 +146,29 @@ name: test_unmerge_values_s32_anyext_v2s32_of_build_vector_v2s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_anyext_v2s32_of_build_vector_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(s1) = G_ICMP intpred(ne), %2(s32), %4 - %7:_(s1) = G_ICMP intpred(ne), %3(s32), %5 - %8:_(s16) = G_ANYEXT %6(s1) - %9:_(s16) = G_ANYEXT %7(s1) - %10:_(<2 x s16>) = G_BUILD_VECTOR %8, %9 - %11:_(<2 x s32>) = G_ANYEXT %10 - %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %11 - %14:_(<2 x s32>) = G_BUILD_VECTOR %12, %13 - $vgpr0_vgpr1 = COPY %14 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %4 + %7:_(i1) = G_ICMP intpred(ne), %3(i32), %5 + %8:_(i16) = G_ANYEXT %6(i1) + %9:_(i16) = G_ANYEXT %7(i1) + %10:_(<2 x i16>) = G_BUILD_VECTOR %8(i16), %9(i16) + %11:_(<2 x i32>) = G_ANYEXT %10(<2 x i16>) + %12:_(i32), %13:_(i32) = G_UNMERGE_VALUES %11(<2 x i32>) + %14:_(<2 x i32>) = G_BUILD_VECTOR %12(i32), %13(i32) + $vgpr0_vgpr1 = COPY %14(<2 x i32>) ... @@ -202,33 +178,33 @@ name: test_unmerge_values_v2s16_zext_v4s32_of_build_vector_v4s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_v2s16_zext_v4s32_of_build_vector_v4s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT2]](s32), [[ZEXT3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<2 x s32>) - %6:_(s1) = G_ICMP intpred(ne), %2(s32), %4 - %7:_(s1) = G_ICMP intpred(ne), %3(s32), %5 - %8:_(s16) = G_ANYEXT %6 - %9:_(s16) = G_ANYEXT %7 - %10:_(<4 x s16>) = G_BUILD_VECTOR %8, %9, %8, %9 - %11:_(<4 x s32>) = G_ZEXT %10 - %12:_(<2 x s32>), %13:_(<2 x s32>) = G_UNMERGE_VALUES %11 - S_ENDPGM 0, implicit %12, implicit %13 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ANYEXT]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ANYEXT]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ANYEXT1]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ZEXT]](i32), [[ZEXT1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ZEXT2]](i32), [[ZEXT3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>), implicit [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(i32), %3:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %4 + %7:_(i1) = G_ICMP intpred(ne), %3(i32), %5 + %8:_(i16) = G_ANYEXT %6(i1) + %9:_(i16) = G_ANYEXT %7(i1) + %10:_(<4 x i16>) = G_BUILD_VECTOR %8(i16), %9(i16), %8(i16), %9(i16) + %11:_(<4 x i32>) = G_ZEXT %10(<4 x i16>) + %12:_(<2 x i32>), %13:_(<2 x i32>) = G_UNMERGE_VALUES %11(<4 x i32>) + S_ENDPGM 0, implicit %12(<2 x i32>), implicit %13(<2 x i32>) ... @@ -237,31 +213,31 @@ name: test_unmerge_values_s1_trunc_v4s1_of_concat_vectors_v4s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v4s1_of_concat_vectors_v4s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[SEXT_INREG2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[SEXT_INREG3]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s1>) = G_TRUNC %2 - %4:_(s1), %5:_(s1), %6:_(s1), %7:_(s1) = G_UNMERGE_VALUES %3 - %8:_(s32) = G_SEXT %4 - %9:_(s32) = G_SEXT %5 - %10:_(s32) = G_SEXT %6 - %11:_(s32) = G_SEXT %7 - $vgpr0 = COPY %8 - $vgpr1 = COPY %9 - $vgpr2 = COPY %10 - $vgpr3 = COPY %11 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 1 + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[SEXT_INREG1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[SEXT_INREG2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[SEXT_INREG3]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(<4 x i1>) = G_TRUNC %2(<4 x i32>) + %4:_(i1), %5:_(i1), %6:_(i1), %7:_(i1) = G_UNMERGE_VALUES %3(<4 x i1>) + %8:_(i32) = G_SEXT %4(i1) + %9:_(i32) = G_SEXT %5(i1) + %10:_(i32) = G_SEXT %6(i1) + %11:_(i32) = G_SEXT %7(i1) + $vgpr0 = COPY %8(i32) + $vgpr1 = COPY %9(i32) + $vgpr2 = COPY %10(i32) + $vgpr3 = COPY %11(i32) ... --- @@ -269,23 +245,23 @@ name: test_unmerge_values_s16_of_concat_vectors_v2s16_v2s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s16_of_concat_vectors_v2s16_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %2 - S_ENDPGM 0, implicit %3, implicit %4, implicit %5, implicit %6 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(i16), %4:_(i16), %5:_(i16), %6:_(i16) = G_UNMERGE_VALUES %2(<4 x i16>) + S_ENDPGM 0, implicit %3(i16), implicit %4(i16), implicit %5(i16), implicit %6(i16) ... --- @@ -293,16 +269,16 @@ name: test_unmerge_values_s32_of_concat_vectors_v2s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_of_concat_vectors_v2s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr1_vgpr2 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %2 - S_ENDPGM 0, implicit %3, implicit %4, implicit %5, implicit %6 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](i32), implicit [[UV1]](i32), implicit [[UV2]](i32), implicit [[UV3]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr1_vgpr2 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32) = G_UNMERGE_VALUES %2(<4 x i32>) + S_ENDPGM 0, implicit %3(i32), implicit %4(i32), implicit %5(i32), implicit %6(i32) ... --- @@ -310,16 +286,16 @@ name: test_unmerge_values_s32_of_concat_vectors_v2s64_v2s64 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_of_concat_vectors_v2s64_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32), implicit [[UV4]](s32), implicit [[UV5]](s32), implicit [[UV6]](s32), implicit [[UV7]](s32) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1 - %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %2 - S_ENDPGM 0, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](i32), implicit [[UV1]](i32), implicit [[UV2]](i32), implicit [[UV3]](i32), implicit [[UV4]](i32), implicit [[UV5]](i32), implicit [[UV6]](i32), implicit [[UV7]](i32) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i64>) = G_CONCAT_VECTORS %0(<2 x i64>), %1(<2 x i64>) + %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32), %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %2(<4 x i64>) + S_ENDPGM 0, implicit %3(i32), implicit %4(i32), implicit %5(i32), implicit %6(i32), implicit %7(i32), implicit %8(i32), implicit %9(i32), implicit %10(i32) ... --- @@ -327,21 +303,21 @@ name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s32>) = G_TRUNC %2 - %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i32), implicit [[TRUNC1]](i32), implicit [[TRUNC2]](i32), implicit [[TRUNC3]](i32) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i64>) = G_CONCAT_VECTORS %0(<2 x i64>), %1(<2 x i64>) + %3:_(<4 x i32>) = G_TRUNC %2(<4 x i64>) + %4:_(i32), %5:_(i32), %6:_(i32), %7:_(i32) = G_UNMERGE_VALUES %3(<4 x i32>) + S_ENDPGM 0, implicit %4(i32), implicit %5(i32), implicit %6(i32), implicit %7(i32) ... --- @@ -349,21 +325,21 @@ name: test_unmerge_values_s64_of_sext_concat_vectors_v2s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s64_of_sext_concat_vectors_v2s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) - ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) - ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT]](s64), implicit [[SEXT1]](s64), implicit [[SEXT2]](s64), implicit [[SEXT3]](s64) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s64>) = G_SEXT %2 - %4:_(s64), %5:_(s64), %6:_(s64), %7:_(s64) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[UV]](i32) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[UV1]](i32) + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(i64) = G_SEXT [[UV2]](i32) + ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(i64) = G_SEXT [[UV3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT]](i64), implicit [[SEXT1]](i64), implicit [[SEXT2]](i64), implicit [[SEXT3]](i64) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(<4 x i64>) = G_SEXT %2(<4 x i32>) + %4:_(i64), %5:_(i64), %6:_(i64), %7:_(i64) = G_UNMERGE_VALUES %3(<4 x i64>) + S_ENDPGM 0, implicit %4(i64), implicit %5(i64), implicit %6(i64), implicit %7(i64) ... --- @@ -371,21 +347,21 @@ name: test_unmerge_values_s64_of_zext_concat_vectors_v2s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s64_of_zext_concat_vectors_v2s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64), implicit [[ZEXT1]](s64), implicit [[ZEXT2]](s64), implicit [[ZEXT3]](s64) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s64>) = G_ZEXT %2 - %4:_(s64), %5:_(s64), %6:_(s64), %7:_(s64) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV]](i32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[UV2]](i32) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[UV3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ZEXT]](i64), implicit [[ZEXT1]](i64), implicit [[ZEXT2]](i64), implicit [[ZEXT3]](i64) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(<4 x i64>) = G_ZEXT %2(<4 x i32>) + %4:_(i64), %5:_(i64), %6:_(i64), %7:_(i64) = G_UNMERGE_VALUES %3(<4 x i64>) + S_ENDPGM 0, implicit %4(i64), implicit %5(i64), implicit %6(i64), implicit %7(i64) ... --- @@ -393,21 +369,21 @@ name: test_unmerge_values_s64_of_anyext_concat_vectors_v2s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s64_of_anyext_concat_vectors_v2s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ANYEXT]](s64), implicit [[ANYEXT1]](s64), implicit [[ANYEXT2]](s64), implicit [[ANYEXT3]](s64) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s64>) = G_ANYEXT %2 - %4:_(s64), %5:_(s64), %6:_(s64), %7:_(s64) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV2]](i32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ANYEXT]](i64), implicit [[ANYEXT1]](i64), implicit [[ANYEXT2]](i64), implicit [[ANYEXT3]](i64) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(<4 x i64>) = G_ANYEXT %2(<4 x i32>) + %4:_(i64), %5:_(i64), %6:_(i64), %7:_(i64) = G_UNMERGE_VALUES %3(<4 x i64>) + S_ENDPGM 0, implicit %4(i64), implicit %5(i64), implicit %6(i64), implicit %7(i64) ... --- @@ -415,36 +391,36 @@ name: test_unmerge_values_s8_of_trunc_v4s16_concat_vectors_v2s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s8_of_trunc_v4s16_concat_vectors_v2s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8), implicit [[TRUNC4]](s8), implicit [[TRUNC5]](s8), implicit [[TRUNC6]](s8), implicit [[TRUNC7]](s8), implicit [[TRUNC8]](s8), implicit [[TRUNC9]](s8) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s16>) = G_TRUNC %2 - %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8), %10:_(s8), %11:_(s8) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](i8), implicit [[TRUNC3]](i8), implicit [[TRUNC4]](i8), implicit [[TRUNC5]](i8), implicit [[TRUNC6]](i8), implicit [[TRUNC7]](i8), implicit [[TRUNC8]](i8), implicit [[TRUNC9]](i8) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(<4 x i16>) = G_TRUNC %2(<4 x i32>) + %4:_(i8), %5:_(i8), %6:_(i8), %7:_(i8), %8:_(i8), %9:_(i8), %10:_(i8), %11:_(i8) = G_UNMERGE_VALUES %3(<4 x i16>) + S_ENDPGM 0, implicit %4(i8), implicit %5(i8), implicit %6(i8), implicit %7(i8), implicit %8(i8), implicit %9(i8), implicit %10(i8), implicit %11(i8) ... --- @@ -452,24 +428,24 @@ name: test_unmerge_values_s16_of_anyext_v4s64_concat_vectors_v2s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s16_of_anyext_v4s64_concat_vectors_v2s32_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s64>) = G_ANYEXT %2 - %4:_(s16), %5:_(s16), %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16), %12:_(s16), %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + %3:_(<4 x i64>) = G_ANYEXT %2(<4 x i32>) + %4:_(i16), %5:_(i16), %6:_(i16), %7:_(i16), %8:_(i16), %9:_(i16), %10:_(i16), %11:_(i16), %12:_(i16), %13:_(i16), %14:_(i16), %15:_(i16), %16:_(i16), %17:_(i16), %18:_(i16), %19:_(i16) = G_UNMERGE_VALUES %3(<4 x i64>) + S_ENDPGM 0, implicit %4(i16), implicit %5(i16), implicit %6(i16), implicit %7(i16) ... # FIXME: Handle this @@ -478,27 +454,27 @@ name: test_unmerge_values_s32_of_concat_vectors_v4s32_v4s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_of_concat_vectors_v4s32_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY3]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC2]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC3]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32), implicit [[BITCAST2]](s32), implicit [[BITCAST3]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = COPY $vgpr6_vgpr7 - %4:_(<8 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3 - %5:_(<8 x s16>) = G_TRUNC %4 - %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %5 - S_ENDPGM 0, implicit %6, implicit %7, implicit %8, implicit %9 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY3]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC2]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC3]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](i32), implicit [[BITCAST1]](i32), implicit [[BITCAST2]](i32), implicit [[BITCAST3]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i32>) = COPY $vgpr6_vgpr7 + %4:_(<8 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>), %2(<2 x i32>), %3(<2 x i32>) + %5:_(<8 x i16>) = G_TRUNC %4(<8 x i32>) + %6:_(i32), %7:_(i32), %8:_(i32), %9:_(i32) = G_UNMERGE_VALUES %5(<8 x i16>) + S_ENDPGM 0, implicit %6(i32), implicit %7(i32), implicit %8(i32), implicit %9(i32) ... --- @@ -506,25 +482,25 @@ name: test_unmerge_values_s64_of_build_vector_v4s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s64_of_build_vector_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3 - %5:_(<4 x s16>) = G_TRUNC %4 - %6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %5 - S_ENDPGM 0, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[TRUNC1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](i32), implicit [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(<4 x i16>) = G_TRUNC %4(<4 x i32>) + %6:_(i32), %7:_(i32) = G_UNMERGE_VALUES %5(<4 x i16>) + S_ENDPGM 0, implicit %6(i32), implicit %7(i32) ... # To properly simplify that one, we would need to insert bitcast @@ -539,25 +515,25 @@ name: test_unmerge_values_s128_of_zext_of_concat_vectors body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s128_of_zext_of_concat_vectors - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND1]](s32), [[LSHR1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(<4 x s32>) = G_ZEXT %2 - %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[AND]](i32), [[LSHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[AND1]](i32), [[LSHR1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64), implicit [[MV1]](i64) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(<4 x i32>) = G_ZEXT %2(<4 x i16>) + %4:_(i64), %5:_(i64) = G_UNMERGE_VALUES %3(<4 x i32>) + S_ENDPGM 0, implicit %4(i64), implicit %5(i64) ... --- @@ -566,18 +542,18 @@ name: test_unmerge_values_v3s32_of_v12s32_concat_vectors_v4s32 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_v3s32_of_v12s32_concat_vectors_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>), implicit [[UV1]](<3 x s32>), implicit [[UV2]](<3 x s32>), implicit [[UV3]](<3 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2 - %4:_(<3 x s32>), %5:_(<3 x s32>), %6:_(<3 x s32>), %7:_(<3 x s32>) = G_UNMERGE_VALUES %3 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i32>) = G_CONCAT_VECTORS [[COPY]](<4 x i32>), [[COPY1]](<4 x i32>), [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x i32>), [[UV1:%[0-9]+]]:_(<3 x i32>), [[UV2:%[0-9]+]]:_(<3 x i32>), [[UV3:%[0-9]+]]:_(<3 x i32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x i32>), implicit [[UV1]](<3 x i32>), implicit [[UV2]](<3 x i32>), implicit [[UV3]](<3 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<12 x i32>) = G_CONCAT_VECTORS %0(<4 x i32>), %1(<4 x i32>), %2(<4 x i32>) + %4:_(<3 x i32>), %5:_(<3 x i32>), %6:_(<3 x i32>), %7:_(<3 x i32>) = G_UNMERGE_VALUES %3(<12 x i32>) + S_ENDPGM 0, implicit %4(<3 x i32>), implicit %5(<3 x i32>), implicit %6(<3 x i32>), implicit %7(<3 x i32>) ... --- @@ -585,45 +561,45 @@ name: test_unmerge_values_v3s16_of_v12s16_concat_vectors_v4s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_v3s16_of_v12s16_concat_vectors_v4s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR3]](s32), [[BITCAST5]](s32) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[BITCAST7]](s32), [[LSHR5]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>), implicit [[BUILD_VECTOR1]](<3 x s32>), implicit [[BUILD_VECTOR2]](<3 x s32>), implicit [[BUILD_VECTOR3]](<3 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<12 x s16>) = G_CONCAT_VECTORS %0, %1, %2 - %4:_(<3 x s16>), %5:_(<3 x s16>), %6:_(<3 x s16>), %7:_(<3 x s16>) = G_UNMERGE_VALUES %3 - %8:_(<3 x s32>) = G_ANYEXT %4 - %9:_(<3 x s32>) = G_ANYEXT %5 - %10:_(<3 x s32>) = G_ANYEXT %6 - %11:_(<3 x s32>) = G_ANYEXT %7 - S_ENDPGM 0, implicit %8, implicit %9, implicit %10, implicit %11 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[LSHR]](i32), [[BITCAST1]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR1]](i32), [[BITCAST3]](i32), [[LSHR2]](i32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST4]](i32), [[LSHR3]](i32), [[BITCAST5]](i32) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV11]](<2 x i16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR4]](i32), [[BITCAST7]](i32), [[LSHR5]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>), implicit [[BUILD_VECTOR1]](<3 x i32>), implicit [[BUILD_VECTOR2]](<3 x i32>), implicit [[BUILD_VECTOR3]](<3 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<12 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>), %2(<4 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>), %6:_(<3 x i16>), %7:_(<3 x i16>) = G_UNMERGE_VALUES %3(<12 x i16>) + %8:_(<3 x i32>) = G_ANYEXT %4(<3 x i16>) + %9:_(<3 x i32>) = G_ANYEXT %5(<3 x i16>) + %10:_(<3 x i32>) = G_ANYEXT %6(<3 x i16>) + %11:_(<3 x i32>) = G_ANYEXT %7(<3 x i16>) + S_ENDPGM 0, implicit %8(<3 x i32>), implicit %9(<3 x i32>), implicit %10(<3 x i32>), implicit %11(<3 x i32>) ... --- @@ -636,41 +612,41 @@ body: | ; CHECK-LABEL: name: unmerge_v2s16_from_v4s16_sext_v4s8_concat_vectors_v2s8 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s8) = G_TRUNC %0 - %5:_(s8) = G_TRUNC %1 - %6:_(s8) = G_TRUNC %2 - %7:_(s8) = G_TRUNC %3 - %8:_(<2 x s8>) = G_BUILD_VECTOR %4, %5 - %9:_(<2 x s8>) = G_BUILD_VECTOR %6, %7 - %10:_(<4 x s8>) = G_CONCAT_VECTORS %8, %9 - %11:_(<4 x s16>) = G_SEXT %10 - %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %11 - S_ENDPGM 0, implicit %12, implicit %13 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY2]], 8 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY3]], 8 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x i16>), implicit [[BITCAST1]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i8) = G_TRUNC %0(i32) + %5:_(i8) = G_TRUNC %1(i32) + %6:_(i8) = G_TRUNC %2(i32) + %7:_(i8) = G_TRUNC %3(i32) + %8:_(<2 x i8>) = G_BUILD_VECTOR %4(i8), %5(i8) + %9:_(<2 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8) + %10:_(<4 x i8>) = G_CONCAT_VECTORS %8(<2 x i8>), %9(<2 x i8>) + %11:_(<4 x i16>) = G_SEXT %10(<4 x i8>) + %12:_(<2 x i16>), %13:_(<2 x i16>) = G_UNMERGE_VALUES %11(<4 x i16>) + S_ENDPGM 0, implicit %12(<2 x i16>), implicit %13(<2 x i16>) ... --- @@ -683,67 +659,67 @@ body: | ; CHECK-LABEL: name: unmerge_v2s16_from_v8s16_sext_v8s8_concat_vectors_v4s8 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8 - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8 - ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 - ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG6]], [[C]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG7]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s8) = G_TRUNC %0 - %9:_(s8) = G_TRUNC %1 - %10:_(s8) = G_TRUNC %2 - %11:_(s8) = G_TRUNC %3 - %12:_(s8) = G_TRUNC %4 - %13:_(s8) = G_TRUNC %5 - %14:_(s8) = G_TRUNC %6 - %15:_(s8) = G_TRUNC %7 - %16:_(<4 x s8>) = G_BUILD_VECTOR %8, %9, %10, %11 - %17:_(<4 x s8>) = G_BUILD_VECTOR %12, %13, %14, %15 - %18:_(<8 x s8>) = G_CONCAT_VECTORS %16, %17 - %19:_(<8 x s16>) = G_SEXT %18 - %20:_(<2 x s16>), %21:_(<2 x s16>), %22:_(<2 x s16>), %23:_(<2 x s16>) = G_UNMERGE_VALUES %19 - S_ENDPGM 0, implicit %20, implicit %21, implicit %22, implicit %23 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY2]], 8 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY3]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY4]], 8 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY5]], 8 + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY6]], 8 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY7]], 8 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG6]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x i16>), implicit [[BITCAST1]](<2 x i16>), implicit [[BITCAST2]](<2 x i16>), implicit [[BITCAST3]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i8) = G_TRUNC %0(i32) + %9:_(i8) = G_TRUNC %1(i32) + %10:_(i8) = G_TRUNC %2(i32) + %11:_(i8) = G_TRUNC %3(i32) + %12:_(i8) = G_TRUNC %4(i32) + %13:_(i8) = G_TRUNC %5(i32) + %14:_(i8) = G_TRUNC %6(i32) + %15:_(i8) = G_TRUNC %7(i32) + %16:_(<4 x i8>) = G_BUILD_VECTOR %8(i8), %9(i8), %10(i8), %11(i8) + %17:_(<4 x i8>) = G_BUILD_VECTOR %12(i8), %13(i8), %14(i8), %15(i8) + %18:_(<8 x i8>) = G_CONCAT_VECTORS %16(<4 x i8>), %17(<4 x i8>) + %19:_(<8 x i16>) = G_SEXT %18(<8 x i8>) + %20:_(<2 x i16>), %21:_(<2 x i16>), %22:_(<2 x i16>), %23:_(<2 x i16>) = G_UNMERGE_VALUES %19(<8 x i16>) + S_ENDPGM 0, implicit %20(<2 x i16>), implicit %21(<2 x i16>), implicit %22(<2 x i16>), implicit %23(<2 x i16>) ... --- @@ -756,119 +732,119 @@ body: | ; CHECK-LABEL: name: unmerge_v2s16_from_v16s16_sext_v16s8_concat_vectors_v8s8 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8 - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8 - ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 - ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG6]], [[C]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG7]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 8 - ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 8 - ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8 - ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8 - ; CHECK-NEXT: [[SEXT_INREG12:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8 - ; CHECK-NEXT: [[SEXT_INREG13:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8 - ; CHECK-NEXT: [[SEXT_INREG14:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 8 - ; CHECK-NEXT: [[SEXT_INREG15:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY15]], 8 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG8]], [[C]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG9]], [[C]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG10]], [[C]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG11]], [[C]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG12]], [[C]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG13]], [[C]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG14]], [[C]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG15]], [[C]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>), implicit [[BITCAST4]](<2 x s16>), implicit [[BITCAST5]](<2 x s16>), implicit [[BITCAST6]](<2 x s16>), implicit [[BITCAST7]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(s32) = COPY $vgpr11 - %12:_(s32) = COPY $vgpr12 - %13:_(s32) = COPY $vgpr13 - %14:_(s32) = COPY $vgpr14 - %15:_(s32) = COPY $vgpr15 - %16:_(s8) = G_TRUNC %0 - %17:_(s8) = G_TRUNC %1 - %18:_(s8) = G_TRUNC %2 - %19:_(s8) = G_TRUNC %3 - %20:_(s8) = G_TRUNC %4 - %21:_(s8) = G_TRUNC %5 - %22:_(s8) = G_TRUNC %6 - %23:_(s8) = G_TRUNC %7 - %24:_(s8) = G_TRUNC %8 - %25:_(s8) = G_TRUNC %9 - %26:_(s8) = G_TRUNC %10 - %27:_(s8) = G_TRUNC %11 - %28:_(s8) = G_TRUNC %12 - %29:_(s8) = G_TRUNC %13 - %30:_(s8) = G_TRUNC %14 - %31:_(s8) = G_TRUNC %15 - %32:_(<8 x s8>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23 - %33:_(<8 x s8>) = G_BUILD_VECTOR %24, %25, %26, %27, %28, %29, %30, %31 - %34:_(<16 x s8>) = G_CONCAT_VECTORS %32, %33 - %35:_(<16 x s16>) = G_SEXT %34 - %36:_(<2 x s16>), %37:_(<2 x s16>), %38:_(<2 x s16>), %39:_(<2 x s16>), %40:_(<2 x s16>), %41:_(<2 x s16>), %42:_(<2 x s16>), %43:_(<2 x s16>) = G_UNMERGE_VALUES %35 - S_ENDPGM 0, implicit %36, implicit %37, implicit %38, implicit %39, implicit %40, implicit %41, implicit %42, implicit %43 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY2]], 8 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY3]], 8 + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY4]], 8 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY5]], 8 + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY6]], 8 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY7]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG6]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY8]], 8 + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY9]], 8 + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY10]], 8 + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY11]], 8 + ; CHECK-NEXT: [[SEXT_INREG12:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY12]], 8 + ; CHECK-NEXT: [[SEXT_INREG13:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY13]], 8 + ; CHECK-NEXT: [[SEXT_INREG14:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY14]], 8 + ; CHECK-NEXT: [[SEXT_INREG15:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY15]], 8 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG9]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG10]], [[C]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG11]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C1]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG12]], [[C]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG13]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND13]], [[C1]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG14]], [[C]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG15]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND15]], [[C1]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x i16>), implicit [[BITCAST1]](<2 x i16>), implicit [[BITCAST2]](<2 x i16>), implicit [[BITCAST3]](<2 x i16>), implicit [[BITCAST4]](<2 x i16>), implicit [[BITCAST5]](<2 x i16>), implicit [[BITCAST6]](<2 x i16>), implicit [[BITCAST7]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(i32) = COPY $vgpr10 + %11:_(i32) = COPY $vgpr11 + %12:_(i32) = COPY $vgpr12 + %13:_(i32) = COPY $vgpr13 + %14:_(i32) = COPY $vgpr14 + %15:_(i32) = COPY $vgpr15 + %16:_(i8) = G_TRUNC %0(i32) + %17:_(i8) = G_TRUNC %1(i32) + %18:_(i8) = G_TRUNC %2(i32) + %19:_(i8) = G_TRUNC %3(i32) + %20:_(i8) = G_TRUNC %4(i32) + %21:_(i8) = G_TRUNC %5(i32) + %22:_(i8) = G_TRUNC %6(i32) + %23:_(i8) = G_TRUNC %7(i32) + %24:_(i8) = G_TRUNC %8(i32) + %25:_(i8) = G_TRUNC %9(i32) + %26:_(i8) = G_TRUNC %10(i32) + %27:_(i8) = G_TRUNC %11(i32) + %28:_(i8) = G_TRUNC %12(i32) + %29:_(i8) = G_TRUNC %13(i32) + %30:_(i8) = G_TRUNC %14(i32) + %31:_(i8) = G_TRUNC %15(i32) + %32:_(<8 x i8>) = G_BUILD_VECTOR %16(i8), %17(i8), %18(i8), %19(i8), %20(i8), %21(i8), %22(i8), %23(i8) + %33:_(<8 x i8>) = G_BUILD_VECTOR %24(i8), %25(i8), %26(i8), %27(i8), %28(i8), %29(i8), %30(i8), %31(i8) + %34:_(<16 x i8>) = G_CONCAT_VECTORS %32(<8 x i8>), %33(<8 x i8>) + %35:_(<16 x i16>) = G_SEXT %34(<16 x i8>) + %36:_(<2 x i16>), %37:_(<2 x i16>), %38:_(<2 x i16>), %39:_(<2 x i16>), %40:_(<2 x i16>), %41:_(<2 x i16>), %42:_(<2 x i16>), %43:_(<2 x i16>) = G_UNMERGE_VALUES %35(<16 x i16>) + S_ENDPGM 0, implicit %36(<2 x i16>), implicit %37(<2 x i16>), implicit %38(<2 x i16>), implicit %39(<2 x i16>), implicit %40(<2 x i16>), implicit %41(<2 x i16>), implicit %42(<2 x i16>), implicit %43(<2 x i16>) ... --- @@ -879,18 +855,18 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s32_trunc_s96_of_merge_values_s192_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s192) = G_MERGE_VALUES %0, %1, %2 - %4:_(s96) = G_TRUNC %3 - %5:_(s32), %6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %4 - S_ENDPGM 0, implicit %5, implicit %6, implicit %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](i32), implicit [[UV1]](i32), implicit [[UV2]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i192) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64) + %4:_(i96) = G_TRUNC %3(i192) + %5:_(i32), %6:_(i32), %7:_(i32) = G_UNMERGE_VALUES %4(i96) + S_ENDPGM 0, implicit %5(i32), implicit %6(i32), implicit %7(i32) ... @@ -902,28 +878,28 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s16_trunc_s96_of_merge_values_s192_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s192) = G_MERGE_VALUES %0, %1, %2 - %4:_(s96) = G_TRUNC %3 - %5:_(s16), %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16) = G_UNMERGE_VALUES %4 - S_ENDPGM 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16), implicit [[TRUNC4]](i16), implicit [[TRUNC5]](i16) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i192) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64) + %4:_(i96) = G_TRUNC %3(i192) + %5:_(i16), %6:_(i16), %7:_(i16), %8:_(i16), %9:_(i16), %10:_(i16) = G_UNMERGE_VALUES %4(i96) + S_ENDPGM 0, implicit %5(i16), implicit %6(i16), implicit %7(i16), implicit %8(i16), implicit %9(i16), implicit %10(i16) ... @@ -935,35 +911,35 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s16_trunc_s96_of_merge_values_s192_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192), implicit [[MV1]](s96), implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s192) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5 - %7:_(s96) = G_TRUNC %6 - %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16), %12:_(s16), %13:_(s16) = G_UNMERGE_VALUES %7 - S_ENDPGM 0, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i192), implicit [[MV1]](i96), implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16), implicit [[TRUNC4]](i16), implicit [[TRUNC5]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i192) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32) + %7:_(i96) = G_TRUNC %6(i192) + %8:_(i16), %9:_(i16), %10:_(i16), %11:_(i16), %12:_(i16), %13:_(i16) = G_UNMERGE_VALUES %7(i96) + S_ENDPGM 0, implicit %6(i192), implicit %7(i96), implicit %8(i16), implicit %9(i16), implicit %10(i16), implicit %11(i16), implicit %12(i16), implicit %13(i16) ... @@ -972,19 +948,19 @@ name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[DEF]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s128) = G_ANYEXT %2 - %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3 - $vgpr0_vgpr1 = COPY %4 - $vgpr2_vgpr3 = COPY %5 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[DEF]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i128) = G_ANYEXT %2(i64) + %4:_(i64), %5:_(i64) = G_UNMERGE_VALUES %3(i128) + $vgpr0_vgpr1 = COPY %4(i64) + $vgpr2_vgpr3 = COPY %5(i64) ... @@ -993,17 +969,17 @@ name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s64) = G_TRUNC %2 - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 + ; CHECK: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i64) = G_TRUNC %2(i128) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %3(i64) + $vgpr0 = COPY %4(i32) + $vgpr1 = COPY %5(i32) ... --- @@ -1015,17 +991,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s8_v4s8_trunc_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s8>) = G_TRUNC %0 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i8) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i8), implicit [[TRUNC1]](i8), implicit [[TRUNC2]](i8), implicit [[TRUNC3]](i8) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i8>) = G_TRUNC %0(<4 x i32>) + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %1(<4 x i8>) + S_ENDPGM 0, implicit %2(i8), implicit %3(i8), implicit %4(i8), implicit %5(i8) ... @@ -1038,17 +1014,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v2s8_v4s8_trunc_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s8>) = G_TRUNC %0 - %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %1 - %4:_(<2 x s16>) = G_ANYEXT %2 - %5:_(<2 x s16>) = G_ANYEXT %3 - S_ENDPGM 0, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV1]](<2 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x i16>), implicit [[TRUNC1]](<2 x i16>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i8>) = G_TRUNC %0(<4 x i32>) + %2:_(<2 x i8>), %3:_(<2 x i8>) = G_UNMERGE_VALUES %1(<4 x i8>) + %4:_(<2 x i16>) = G_ANYEXT %2(<2 x i8>) + %5:_(<2 x i16>) = G_ANYEXT %3(<2 x i8>) + S_ENDPGM 0, implicit %4(<2 x i16>), implicit %5(<2 x i16>) ... @@ -1061,15 +1037,15 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v4s8_v8s8_trunc_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[UV]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[UV1]](<4 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<4 x s8>), implicit [[TRUNC1]](<4 x s8>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s8>) = G_TRUNC %0 - %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[UV]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[UV1]](<4 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<4 x i8>), implicit [[TRUNC1]](<4 x i8>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i8>) = G_TRUNC %0(<8 x i32>) + %2:_(<4 x i8>), %3:_(<4 x i8>) = G_UNMERGE_VALUES %1(<8 x i8>) + S_ENDPGM 0, implicit %2(<4 x i8>), implicit %3(<4 x i8>) ... @@ -1083,17 +1059,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s16_v4s16_trunc_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s16>) = G_TRUNC %0 - %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i16>) = G_TRUNC %0(<4 x i32>) + %2:_(i16), %3:_(i16), %4:_(i16), %5:_(i16) = G_UNMERGE_VALUES %1(<4 x i16>) + S_ENDPGM 0, implicit %2(i16), implicit %3(i16), implicit %4(i16), implicit %5(i16) ... @@ -1106,15 +1082,15 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v2s16_v4s16_trunc_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s16>) = G_TRUNC %0 - %2:_(<2 x s16>), %3:_(<2 x s16>) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV1]](<2 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x i16>), implicit [[TRUNC1]](<2 x i16>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i16>) = G_TRUNC %0(<4 x i32>) + %2:_(<2 x i16>), %3:_(<2 x i16>) = G_UNMERGE_VALUES %1(<4 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>), implicit %3(<2 x i16>) ... @@ -1127,17 +1103,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v2s16_v8s16_trunc_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>), implicit [[TRUNC2]](<2 x s16>), implicit [[TRUNC3]](<2 x s16>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s16>) = G_TRUNC %0 - %2:_(<2 x s16>), %3:_(<2 x s16>), %4:_(<2 x s16>), %5:_(<2 x s16>) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV1]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV2]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV3]](<2 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x i16>), implicit [[TRUNC1]](<2 x i16>), implicit [[TRUNC2]](<2 x i16>), implicit [[TRUNC3]](<2 x i16>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i16>) = G_TRUNC %0(<8 x i32>) + %2:_(<2 x i16>), %3:_(<2 x i16>), %4:_(<2 x i16>), %5:_(<2 x i16>) = G_UNMERGE_VALUES %1(<8 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>), implicit %3(<2 x i16>), implicit %4(<2 x i16>), implicit %5(<2 x i16>) ... @@ -1150,20 +1126,20 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v4s16_v8s16_trunc_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>), implicit [[CONCAT_VECTORS1]](<4 x s16>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s16>) = G_TRUNC %0 - %2:_(<4 x s16>), %3:_(<4 x s16>) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV1]](<2 x i32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x i16>), [[TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x i32>), [[UV5:%[0-9]+]]:_(<2 x i32>), [[UV6:%[0-9]+]]:_(<2 x i32>), [[UV7:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV6]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV7]](<2 x i32>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x i16>), [[TRUNC3]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>), implicit [[CONCAT_VECTORS1]](<4 x i16>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i16>) = G_TRUNC %0(<8 x i32>) + %2:_(<4 x i16>), %3:_(<4 x i16>) = G_UNMERGE_VALUES %1(<8 x i16>) + S_ENDPGM 0, implicit %2(<4 x i16>), implicit %3(<4 x i16>) ... @@ -1176,22 +1152,22 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s8_v4s8_trunc_v4s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_TRUNC %0 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i8), implicit [[TRUNC1]](i8), implicit [[TRUNC2]](i8), implicit [[TRUNC3]](i8) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i8>) = G_TRUNC %0(<4 x i16>) + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %1(<4 x i8>) + S_ENDPGM 0, implicit %2(i8), implicit %3(i8), implicit %4(i8), implicit %5(i8) ... @@ -1204,15 +1180,15 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v2s8_v4s8_trunc_v4s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](<2 x s16>), implicit [[UV1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_TRUNC %0 - %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %1 - %4:_(<2 x s16>) = G_ANYEXT %2 - %5:_(<2 x s16>) = G_ANYEXT %3 - S_ENDPGM 0, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](<2 x i16>), implicit [[UV1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i8>) = G_TRUNC %0(<4 x i16>) + %2:_(<2 x i8>), %3:_(<2 x i8>) = G_UNMERGE_VALUES %1(<4 x i8>) + %4:_(<2 x i16>) = G_ANYEXT %2(<2 x i8>) + %5:_(<2 x i16>) = G_ANYEXT %3(<2 x i8>) + S_ENDPGM 0, implicit %4(<2 x i16>), implicit %5(<2 x i16>) ... @@ -1225,17 +1201,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s32_v4s32_trunc_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s32>) = G_TRUNC %0 - %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i32), implicit [[TRUNC1]](i32), implicit [[TRUNC2]](i32), implicit [[TRUNC3]](i32) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i32>) = G_TRUNC %0(<4 x i64>) + %2:_(i32), %3:_(i32), %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %1(<4 x i32>) + S_ENDPGM 0, implicit %2(i32), implicit %3(i32), implicit %4(i32), implicit %5(i32) ... @@ -1248,20 +1224,20 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v2s32_v4s32_trunc_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC2]](s32), [[TRUNC3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s32>) = G_TRUNC %0 - %2:_(<2 x s32>), %3:_(<2 x s32>) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[UV6]](i64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[UV7]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC2]](i32), [[TRUNC3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>), implicit [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i32>) = G_TRUNC %0(<4 x i64>) + %2:_(<2 x i32>), %3:_(<2 x i32>) = G_UNMERGE_VALUES %1(<4 x i32>) + S_ENDPGM 0, implicit %2(<2 x i32>), implicit %3(<2 x i32>) ... @@ -1274,17 +1250,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s16_v4s16_trunc_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s16>) = G_TRUNC %0 - %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i16>) = G_TRUNC %0(<4 x i64>) + %2:_(i16), %3:_(i16), %4:_(i16), %5:_(i16) = G_UNMERGE_VALUES %1(<4 x i16>) + S_ENDPGM 0, implicit %2(i16), implicit %3(i16), implicit %4(i16), implicit %5(i16) ... @@ -1297,30 +1273,30 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_v2s16_v4s16_trunc_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s16>) = G_TRUNC %0 - %2:_(<2 x s16>), %3:_(<2 x s16>) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[UV6]](i64) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[UV7]](i64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x i16>), implicit [[BITCAST1]](<2 x i16>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i16>) = G_TRUNC %0(<4 x i64>) + %2:_(<2 x i16>), %3:_(<2 x i16>) = G_UNMERGE_VALUES %1(<4 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>), implicit %3(<2 x i16>) ... @@ -1333,29 +1309,29 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s16_from_v3s16_from_v6s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1 - %6:_(s16), %7:_(s16), %8:_(s16) = G_UNMERGE_VALUES %2 - S_ENDPGM 0, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[TRUNC3]](i16), implicit [[TRUNC4]](i16), implicit [[TRUNC5]](i16) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(i16), %4:_(i16), %5:_(i16) = G_UNMERGE_VALUES %1(<3 x i16>) + %6:_(i16), %7:_(i16), %8:_(i16) = G_UNMERGE_VALUES %2(<3 x i16>) + S_ENDPGM 0, implicit %3(i16), implicit %4(i16), implicit %5(i16), implicit %6(i16), implicit %7(i16), implicit %8(i16) ... @@ -1368,27 +1344,27 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s16_from_v3s16_from_v6s16_other_def_use ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1 - %6:_(<3 x s32>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3, implicit %4, implicit %5, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR1]](i32), [[BITCAST3]](i32), [[LSHR2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16), implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(i16), %4:_(i16), %5:_(i16) = G_UNMERGE_VALUES %1(<3 x i16>) + %6:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_ENDPGM 0, implicit %3(i16), implicit %4(i16), implicit %5(i16), implicit %6(<3 x i32>) ... @@ -1401,34 +1377,34 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s32_from_sext_v2s64_from_v2s1 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT1]], 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(<2 x s1>) = G_BUILD_VECTOR %3, %4 - %6:_(<2 x s64>) = G_SEXT %5 - %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %6 - $vgpr0 = COPY %7 - $vgpr1 = COPY %8 - $vgpr2 = COPY %9 - $vgpr3 = COPY %10 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(<2 x i1>) = G_BUILD_VECTOR %3(i1), %4(i1) + %6:_(<2 x i64>) = G_SEXT %5(<2 x i1>) + %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %6(<2 x i64>) + $vgpr0 = COPY %7(i32) + $vgpr1 = COPY %8(i32) + $vgpr2 = COPY %9(i32) + $vgpr3 = COPY %10(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -1442,35 +1418,35 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s32_from_zext_v2s64_from_v2s1 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(<2 x s1>) = G_BUILD_VECTOR %3, %4 - %6:_(<2 x s64>) = G_ZEXT %5 - %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %6 - $vgpr0 = COPY %7 - $vgpr1 = COPY %8 - $vgpr2 = COPY %9 - $vgpr3 = COPY %10 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(<2 x i1>) = G_BUILD_VECTOR %3(i1), %4(i1) + %6:_(<2 x i64>) = G_ZEXT %5(<2 x i1>) + %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %6(<2 x i64>) + $vgpr0 = COPY %7(i32) + $vgpr1 = COPY %8(i32) + $vgpr2 = COPY %9(i32) + $vgpr3 = COPY %10(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -1484,34 +1460,34 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s32_from_anyext_v2s64_from_v2s1 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT2]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT2]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT3]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(<2 x s1>) = G_BUILD_VECTOR %3, %4 - %6:_(<2 x s64>) = G_ANYEXT %5 - %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %6 - $vgpr0 = COPY %7 - $vgpr1 = COPY %8 - $vgpr2 = COPY %9 - $vgpr3 = COPY %10 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(<2 x i1>) = G_BUILD_VECTOR %3(i1), %4(i1) + %6:_(<2 x i64>) = G_ANYEXT %5(<2 x i1>) + %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %6(<2 x i64>) + $vgpr0 = COPY %7(i32) + $vgpr1 = COPY %8(i32) + $vgpr2 = COPY %9(i32) + $vgpr3 = COPY %10(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -1525,46 +1501,46 @@ body: | ; CHECK-LABEL: name: test_unmerge_values_s32_from_sext_v3s64_from_v3s1 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT2]], 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG2]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT1]], 1 + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT2]], 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG2]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %1, %3 - %6:_(s1) = G_ICMP intpred(eq), %2, %3 - %7:_(<3 x s1>) = G_BUILD_VECTOR %4, %5, %6 - %8:_(<3 x s64>) = G_SEXT %7 - %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 - $vgpr2 = COPY %11 - $vgpr3 = COPY %12 - $vgpr4 = COPY %13 - $vgpr5 = COPY %14 - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 , implicit $vgpr4, implicit $vgpr5 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %1(i32), %3 + %6:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %7:_(<3 x i1>) = G_BUILD_VECTOR %4(i1), %5(i1), %6(i1) + %8:_(<3 x i64>) = G_SEXT %7(<3 x i1>) + %9:_(i32), %10:_(i32), %11:_(i32), %12:_(i32), %13:_(i32), %14:_(i32) = G_UNMERGE_VALUES %8(<3 x i64>) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) + $vgpr2 = COPY %11(i32) + $vgpr3 = COPY %12(i32) + $vgpr4 = COPY %13(i32) + $vgpr5 = COPY %14(i32) + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... @@ -1574,20 +1550,17 @@ body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_look_through_scalar_to_vector_bitcast - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_MERGE_VALUES %0:_(s32), %1:_(s32) - %3:_(<2 x s32>) = G_BITCAST %2:_(s64) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3:_(<2 x s32>) - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(<2 x i32>) = G_BITCAST %2(i64) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %3(<2 x i32>) + $vgpr0 = COPY %4(i32) + $vgpr1 = COPY %5(i32) ... --- @@ -1596,18 +1569,18 @@ body: | bb.0: ; CHECK-LABEL: name: test_unmerge_values_look_through_vector_to_scalar_bitcast - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32) - %3:_(s64) = G_BITCAST %2:_(<2 x s32>) - %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3:_(s64) - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + %3:_(i64) = G_BITCAST %2(<2 x i32>) + %4:_(i32), %5:_(i32) = G_UNMERGE_VALUES %3(i64) + $vgpr0 = COPY %4(i32) + $vgpr1 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir index 7334ae71d63cf..3f30ae23cf9fd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -10,15 +10,15 @@ body: | ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s16_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - %2:_(<2 x s32>) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[COPY]], [[BUILD_VECTOR]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i32>) = G_ZEXT %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -30,19 +30,19 @@ body: | ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s16_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - %2:_(<2 x s64>) = G_ZEXT %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[AND]](i64), [[AND1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i64>) = G_ZEXT %1(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -54,20 +54,20 @@ body: | ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_TRUNC %0 - %2:_(<2 x s16>) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[TRUNC]], [[BITCAST]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x i16>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i8>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i16>) = G_ZEXT %1(<2 x i8>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -79,20 +79,20 @@ body: | ; CHECK-LABEL: name: test_zext_trunc_v3s32_to_v3s16_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(<3 x s32>) = G_ZEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[UV4]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(<3 x i32>) = G_ZEXT %1(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... # Test for "Too many bits for uint64_t" assertion when combining @@ -106,20 +106,20 @@ body: | ; CHECK-LABEL: name: test_zext_128_trunc_s128_merge ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s96) = G_TRUNC %2 - %4:_(s128) = G_ZEXT %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[AND]](i64), [[AND1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i128) = G_MERGE_VALUES %0(i64), %1(i64) + %3:_(i96) = G_TRUNC %2(i128) + %4:_(i128) = G_ZEXT %3(i96) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(i128) ... --- @@ -131,19 +131,19 @@ body: | ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s1_to_s8 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s8) = G_SEXT %2 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i8) = G_SEXT %2(i1) + %4:_(i32) = G_ZEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -155,19 +155,19 @@ body: | ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s1_to_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_SEXT %2 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_SEXT %2(i1) + %4:_(i32) = G_ZEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -180,16 +180,16 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) - %2:_(s16) = G_SEXT %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + %1:_(i8) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i16) = G_SEXT %1(i8) + %3:_(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -201,27 +201,27 @@ body: | ; CHECK-LABEL: name: test_zext_v2s8_to_v2s32_of_sext_v2s1_to_v2s8 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1 - %3:_(<2 x s8>) = G_SEXT %2 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i1>) = G_ICMP intpred(eq), %0(<2 x i32>), %1 + %3:_(<2 x i8>) = G_SEXT %2(<2 x i1>) + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i8>) + $vgpr0_vgpr1 = COPY %4(<2 x i32>) ... --- @@ -233,27 +233,27 @@ body: | ; CHECK-LABEL: name: test_zext_v2s8_to_v2s32_of_sext_v2s1_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1 - %3:_(<2 x s16>) = G_SEXT %2 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i1>) = G_ICMP intpred(eq), %0(<2 x i32>), %1 + %3:_(<2 x i16>) = G_SEXT %2(<2 x i1>) + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i16>) + $vgpr0_vgpr1 = COPY %4(<2 x i32>) ... --- @@ -266,19 +266,19 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 8 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) - %2:_(<2 x s16>) = G_SEXT %1 - %3:_(<2 x s32>) = G_ZEXT %2 - $vgpr0_vgpr1 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + %2:_(<2 x i16>) = G_SEXT %1(<2 x i8>) + %3:_(<2 x i32>) = G_ZEXT %2(<2 x i16>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir index daf7b3a08dc6b..8d26b51ba9f40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -28,30 +28,30 @@ body: | ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[MV]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV6]](i32) + ; GFX10-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p5) :: (store (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD]](p5) :: (store (i32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(<4 x s32>) = G_IMPLICIT_DEF - %5:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4) - %6:_(s32) = G_CONSTANT i32 3 - %7:_(s32) = G_EXTRACT_VECTOR_ELT %5(<4 x s32>), %6(s32) - %8:_(<2 x s32>) = G_SHUFFLE_VECTOR %5(<4 x s32>), %4, shufflemask(2, undef) - %9:_(s32) = G_CONSTANT i32 1 - %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %8, %7(s32), %9(s32) - G_STORE %10(<2 x s32>), %0(p5) :: (store (<2 x s32>), addrspace 5) + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p4) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(<4 x i32>) = G_IMPLICIT_DEF + %5:_(<4 x i32>) = G_LOAD %3(p4) :: (load (<4 x i32>), align 4, addrspace 4) + %6:_(i32) = G_CONSTANT i32 3 + %7:_(i32) = G_EXTRACT_VECTOR_ELT %5(<4 x i32>), %6(i32) + %8:_(<2 x i32>) = G_SHUFFLE_VECTOR %5(<4 x i32>), %4, shufflemask(2, undef) + %9:_(i32) = G_CONSTANT i32 1 + %10:_(<2 x i32>) = G_INSERT_VECTOR_ELT %8, %7(i32), %9(i32) + G_STORE %10(<2 x i32>), %0(p5) :: (store (<2 x i32>), addrspace 5) ... --- @@ -64,32 +64,32 @@ body: | ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[MV]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV6]](i32) + ; GFX10-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p5) :: (store (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD]](p5) :: (store (i32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4) - %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96 - %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64 - %7:_(s32) = G_IMPLICIT_DEF - %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32) - %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32 - %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>) - G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5) - %12:_(s32) = G_CONSTANT i32 4 - %13:_(p5) = G_PTR_ADD %0, %12(s32) - G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5) + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p4) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(<4 x i32>) = G_LOAD %3(p4) :: (load (<4 x i32>), align 4, addrspace 4) + %5:_(i32) = G_EXTRACT %4(<4 x i32>), 96 + %6:_(i32) = G_EXTRACT %4(<4 x i32>), 64 + %7:_(i32) = G_IMPLICIT_DEF + %8:_(<2 x i32>) = G_BUILD_VECTOR %6(i32), %7(i32) + %9:_(<2 x i32>) = G_INSERT %8, %5(i32), 32 + %deaf_def:_(i32), %11:_(i32) = G_UNMERGE_VALUES %9(<2 x i32>) + G_STORE %6(i32), %0(p5) :: (store (i32), align 8, addrspace 5) + %12:_(i32) = G_CONSTANT i32 4 + %13:_(p5) = G_PTR_ADD %0, %12(i32) + G_STORE %11(i32), %13(p5) :: (store (i32) into unknown-address + 4, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir index 8c707349c9766..f93cfe3d3c478 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir @@ -11,17 +11,17 @@ body: | ; CHECK-LABEL: name: add_nullptr_shl_add ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 3 - %2:_(s32) = G_SHL %0, %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_SHL %0, %1(i32) %3:_(p3) = G_CONSTANT i32 0 - %4:_(p3) = G_PTR_ADD %3, %2(s32) - %5:_(s32) = G_PTRTOINT %4(p3) - $vgpr0 = COPY %5(s32) + %4:_(p3) = G_PTR_ADD %3, %2(i32) + %5:_(i32) = G_PTRTOINT %4(p3) + $vgpr0 = COPY %5(i32) ... @@ -35,17 +35,17 @@ body: | ; CHECK-LABEL: name: add_nullptr_mul_add ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[MUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[MUL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %2:_(p3) = G_CONSTANT i32 0 - %3:_(s32) = G_MUL %0:_, %1:_ - %4:_(p3) = G_PTR_ADD %2:_, %3:_(s32) - %5:_(s32) = G_PTRTOINT %4:_(p3) - $vgpr0 = COPY %5:_(s32) + %3:_(i32) = G_MUL %0, %1 + %4:_(p3) = G_PTR_ADD %2, %3(i32) + %5:_(i32) = G_PTRTOINT %4(p3) + $vgpr0 = COPY %5(i32) ... @@ -59,21 +59,21 @@ body: | ; CHECK-LABEL: name: add_nullptr_vec_all_zero ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = COPY $vgpr3 - %3:_(<2 x s32>) = G_BUILD_VECTOR %1:_(s32), %2:_(s32) - %4:_(<2 x s32>) = G_SHL %0, %3(<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x i32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = COPY $vgpr3 + %3:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:_(<2 x i32>) = G_SHL %0, %3(<2 x i32>) %5:_(p3) = G_CONSTANT i32 0 - %6:_(<2 x p3>) = G_BUILD_VECTOR %5:_(p3), %5:_(p3) - %7:_(<2 x p3>) = G_PTR_ADD %6, %4(<2 x s32>) - %8:_(<2 x s32>) = G_PTRTOINT %7(<2 x p3>) - $vgpr0_vgpr1 = COPY %8(<2 x s32>) + %6:_(<2 x p3>) = G_BUILD_VECTOR %5(p3), %5(p3) + %7:_(<2 x p3>) = G_PTR_ADD %6, %4(<2 x i32>) + %8:_(<2 x i32>) = G_PTRTOINT %7(<2 x p3>) + $vgpr0_vgpr1 = COPY %8(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-to-ptradd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-to-ptradd.mir index 54eaf1eebe8ad..1ae19533ed8c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-to-ptradd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-to-ptradd.mir @@ -12,15 +12,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p1) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[PTR_ADD]](p1) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_PTRTOINT %0 - %3:_(s64) = G_ADD %2, %1 - $vgpr0_vgpr1 = COPY %3 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_PTRTOINT %0(p1) + %3:_(i64) = G_ADD %2, %1 + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -35,15 +35,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p1) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PTRTOINT]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p1) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PTRTOINT]], [[COPY1]] + ; GCN-NEXT: $vgpr0 = COPY [[ADD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_PTRTOINT %0 - %3:_(s32) = G_ADD %2, %1 - $vgpr0 = COPY %3 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_PTRTOINT %0(p1) + %3:_(i32) = G_ADD %2, %1 + $vgpr0 = COPY %3(i32) ... @@ -58,15 +58,15 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](s32) - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PTR_ADD]](p3) - ; GCN-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](i32) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[PTR_ADD]](p3) + ; GCN-NEXT: $vgpr0 = COPY [[PTRTOINT]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_PTRTOINT %0 - %3:_(s32) = G_ADD %2, %1 - $vgpr0 = COPY %3 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_PTRTOINT %0(p3) + %3:_(i32) = G_ADD %2, %1 + $vgpr0 = COPY %3(i32) ... @@ -81,15 +81,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_PTRTOINT %0 - %3:_(s64) = G_ADD %2, %1 - %4:_(p1) = G_INTTOPTR %3 - $vgpr0_vgpr1 = COPY %4 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_PTRTOINT %0(p1) + %3:_(i64) = G_ADD %2, %1 + %4:_(p1) = G_INTTOPTR %3(i64) + $vgpr0_vgpr1 = COPY %4(p1) ... @@ -104,15 +104,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[COPY]], [[COPY1]](<2 x s32>) - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s32>) = G_PTRTOINT [[PTR_ADD]](<2 x p3>) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](<2 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[COPY]], [[COPY1]](<2 x i32>) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x i32>) = G_PTRTOINT [[PTR_ADD]](<2 x p3>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](<2 x i32>) %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_PTRTOINT %0 - %3:_(<2 x s32>) = G_ADD %2, %1 - $vgpr0_vgpr1 = COPY %3 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_PTRTOINT %0(<2 x p3>) + %3:_(<2 x i32>) = G_ADD %2, %1 + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... @@ -127,15 +127,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s32>) = G_PTRTOINT [[COPY]](<2 x p1>) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[PTRTOINT]], [[COPY1]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[ADD]](<2 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x i32>) = G_PTRTOINT [[COPY]](<2 x p1>) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(<2 x i32>) = G_ADD [[PTRTOINT]], [[COPY1]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[ADD]](<2 x i32>) %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s32>) = G_PTRTOINT %0 - %3:_(<2 x s32>) = G_ADD %2, %1 - $vgpr0_vgpr1 = COPY %3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i32>) = G_PTRTOINT %0(<2 x p1>) + %3:_(<2 x i32>) = G_ADD %2, %1 + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... @@ -150,15 +150,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p1) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[PTR_ADD]](p1) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_PTRTOINT %0 - %3:_(s64) = G_ADD %1, %2 - $vgpr0_vgpr1 = COPY %3 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_PTRTOINT %0(p1) + %3:_(i64) = G_ADD %1, %2 + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -174,15 +174,15 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p1) - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[PTRTOINT]](s64) - ; GCN-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p1) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT1]](s64) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p1) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[PTRTOINT]](i64) + ; GCN-NEXT: [[PTRTOINT1:%[0-9]+]]:_(i64) = G_PTRTOINT [[PTR_ADD]](p1) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT1]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_PTRTOINT %0 - %3:_(s64) = G_PTRTOINT %1 - %4:_(s64) = G_ADD %2, %3 - $vgpr0_vgpr1 = COPY %4 + %2:_(i64) = G_PTRTOINT %0(p1) + %3:_(i64) = G_PTRTOINT %1(p1) + %4:_(i64) = G_ADD %2, %3 + $vgpr0_vgpr1 = COPY %4(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir index 7893bfa1d38f0..331507c8f0d45 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir @@ -11,14 +11,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_lshr_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 0 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 0 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -31,14 +31,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -51,14 +51,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -71,14 +71,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -91,14 +91,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -111,14 +111,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -131,16 +131,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %shift:_(s32) = G_LSHR %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -153,14 +153,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte2_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -173,16 +173,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte2_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %shift:_(s32) = G_LSHR %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -195,16 +195,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte2_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %shift:_(s32) = G_LSHR %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -217,16 +217,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte3_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: %shift:_(s32) = G_LSHR %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_LSHR %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_LSHR %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -239,16 +239,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %trunc:_(s16) = G_TRUNC %arg - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s16) = G_LSHR %trunc, %shiftamt - %zext:_(s32) = G_ZEXT %shift - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %trunc:_(i16) = G_TRUNC %arg(i32) + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i16) = G_LSHR %trunc, %shiftamt(i32) + %zext:_(i32) = G_ZEXT %shift(i16) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %zext + $vgpr0 = COPY %result(i32) ... --- @@ -261,17 +261,17 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %trunc:_(s16) = G_TRUNC %arg - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s16) = G_LSHR %trunc, %shiftamt - %zext:_(s32) = G_ZEXT %shift - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext - $vgpr0 = COPY %result + ; CHECK-NEXT: %shift:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %zext:_(i32) = G_ZEXT %shift(i16) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %zext + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %trunc:_(i16) = G_TRUNC %arg(i32) + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i16) = G_LSHR %trunc, %shiftamt(i32) + %zext:_(i32) = G_ZEXT %shift(i16) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %zext + $vgpr0 = COPY %result(i32) ... --- @@ -284,17 +284,17 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %trunc:_(s16) = G_TRUNC %arg - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s16) = G_LSHR %trunc, %shiftamt - %zext:_(s32) = G_ZEXT %shift - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext - $vgpr0 = COPY %result + ; CHECK-NEXT: %shift:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %zext:_(i32) = G_ZEXT %shift(i16) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %zext + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %trunc:_(i16) = G_TRUNC %arg(i32) + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i16) = G_LSHR %trunc, %shiftamt(i32) + %zext:_(i32) = G_ZEXT %shift(i16) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %zext + $vgpr0 = COPY %result(i32) ... --- @@ -307,16 +307,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_zext_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %trunc:_(s16) = G_TRUNC %arg - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s16) = G_LSHR %trunc, %shiftamt - %zext:_(s32) = G_ZEXT %shift - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %zext - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %trunc:_(i16) = G_TRUNC %arg(i32) + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i16) = G_LSHR %trunc, %shiftamt(i32) + %zext:_(i32) = G_ZEXT %shift(i16) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %zext + $vgpr0 = COPY %result(i32) ... --- @@ -329,16 +329,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_shl_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -351,16 +351,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_shl_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -373,14 +373,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte2_shl_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -393,14 +393,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte3_shl_8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -413,16 +413,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_shl_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -435,16 +435,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_shl_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -457,16 +457,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte2_shl_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -479,14 +479,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte3_shl_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %arg - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 16 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %arg + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 16 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -499,16 +499,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_shl_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE0 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -521,16 +521,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_shl_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -543,16 +543,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte2_shl_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE2 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -565,16 +565,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte3_shl_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 24 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 24 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + $vgpr0 = COPY %result(i32) ... # Shift amount is wrong @@ -588,16 +588,16 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte1_shl_7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 7 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 7 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE1 %shift + $vgpr0 = COPY %result(i32) ... --- @@ -610,14 +610,14 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte3_shl_17 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: %shift:_(s32) = G_SHL %arg, %shiftamt(s32) - ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - ; CHECK-NEXT: $vgpr0 = COPY %result(s32) - %arg:_(s32) = COPY $vgpr0 - %shiftamt:_(s32) = G_CONSTANT i32 17 - %shift:_(s32) = G_SHL %arg, %shiftamt - %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %shift - $vgpr0 = COPY %result + ; CHECK-NEXT: %arg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + ; CHECK-NEXT: %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + ; CHECK-NEXT: $vgpr0 = COPY %result(i32) + %arg:_(i32) = COPY $vgpr0 + %shiftamt:_(i32) = G_CONSTANT i32 17 + %shift:_(i32) = G_SHL %arg, %shiftamt(i32) + %result:_(i32) = G_AMDGPU_CVT_F32_UBYTE3 %shift + $vgpr0 = COPY %result(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir index 1eb0b7de0692e..b61f66718d1f5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir @@ -11,16 +11,16 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_32_s64amt ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[ASHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 32 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV1]](i32), [[ASHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 32 + %2:_(i64) = G_ASHR %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -33,16 +33,16 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[ASHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 32 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV1]](i32), [[ASHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 32 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -55,18 +55,18 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR1]](s32), [[ASHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 33 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR1]](i32), [[ASHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -79,14 +79,14 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_31 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 31 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -99,16 +99,16 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_63 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 63 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 63 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -121,12 +121,12 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 64 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 64 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -139,12 +139,12 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s64_65 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 65 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 65 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -157,14 +157,14 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s32_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 16 - %2:_(s32) = G_ASHR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 16 + %2:_(i32) = G_ASHR %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -177,14 +177,14 @@ body: | ; CHECK-LABEL: name: narrow_ashr_s32_17 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 17 - %2:_(s32) = G_ASHR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 17 + %2:_(i32) = G_ASHR %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -197,14 +197,14 @@ body: | ; CHECK-LABEL: name: narrow_ashr_v2s32_17 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s32>) = G_ASHR [[COPY]], [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 17 - %2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1 - %3:_(<2 x s32>) = G_ASHR %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i32>) = G_ASHR [[COPY]], [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 17 + %2:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %1(i32) + %3:_(<2 x i32>) = G_ASHR %0, %2(<2 x i32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir index cb732ace112d8..8eb0dabf64637 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir @@ -10,13 +10,13 @@ body: | ; CHECK-LABEL: name: test_sext_trunc_i64_i32_i64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_TRUNC %0 - %2:_(s64) = G_SEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_TRUNC %0(i64) + %2:_(i64) = G_SEXT %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -28,14 +28,14 @@ body: | ; CHECK-LABEL: name: test_zext_trunc_i64_i32_i64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_TRUNC %0 - %2:_(s64) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_TRUNC %0(i64) + %2:_(i64) = G_ZEXT %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -47,13 +47,13 @@ body: | ; CHECK-LABEL: name: test_zext_zext_i32_i48_i64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s48) = G_ZEXT %0 - %2:_(s64) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i48) = G_ZEXT %0(i32) + %2:_(i64) = G_ZEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -65,13 +65,13 @@ body: | ; CHECK-LABEL: name: test_sext_zext_i32_i48_i64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s48) = G_ZEXT %0 - %2:_(s64) = G_SEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i48) = G_ZEXT %0(i32) + %2:_(i64) = G_SEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -83,11 +83,11 @@ body: | ; CHECK-LABEL: name: test_sext_sext_i32_i48_i64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s48) = G_SEXT %0 - %2:_(s64) = G_SEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i48) = G_SEXT %0(i32) + %2:_(i64) = G_SEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir index 0a2b3da7f7d94..ca6b0cc2c02f9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir @@ -8,16 +8,16 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_ptradd_crash__offset_smaller - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[INTTOPTR]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %1:_(p1) = G_CONSTANT i64 0 - %3:_(s32) = G_CONSTANT i32 3 - %0:_(<4 x s32>) = G_LOAD %1 :: (load (<4 x s32>) from `ptr addrspace(1) null`, addrspace 1) - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $sgpr0 = COPY %2 + %0:_(p1) = G_CONSTANT i64 0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>) from `ptr addrspace(1) null`, addrspace 1) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %2(<4 x i32>), %1(i32) + $sgpr0 = COPY %3(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -28,15 +28,15 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_ptradd_crash__offset_wider - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[INTTOPTR]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %1:_(p1) = G_CONSTANT i64 0 - %3:_(s32) = G_CONSTANT i32 3 - %0:_(<4 x s32>) = G_LOAD %1 :: (load (<4 x s32>) from `ptr addrspace(1) null`, addrspace 1) - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $sgpr0 = COPY %2 + %0:_(p1) = G_CONSTANT i64 0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>) from `ptr addrspace(1) null`, addrspace 1) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %2(<4 x i32>), %1(i32) + $sgpr0 = COPY %3(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fabs-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fabs-fneg.mir index 829d994a92297..41906cd8e771b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fabs-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fabs-fneg.mir @@ -11,17 +11,21 @@ body: | ; CHECK-LABEL: name: test_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0:_(s32) - %2:_(s16) = G_FNEG %1:_ - %3:_(s16) = G_FABS %2:_ - %4:_(s32) = G_ANYEXT %3:_(s16) - $vgpr0 = COPY %4:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FABS]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FNEG %2 + %4:_(f16) = G_FABS %3 + %5:_(i16) = G_BITCAST %4(f16) + %6:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... --- @@ -34,13 +38,17 @@ body: | ; CHECK-LABEL: name: test_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FABS]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FNEG %0 - %2:_(s32) = G_FABS %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FABS]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FNEG %1 + %3:_(f32) = G_FABS %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -53,13 +61,17 @@ body: | ; CHECK-LABEL: name: test_f64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FNEG %0 - %2:_(s64) = G_FABS %1 - $vgpr0_vgpr1 = COPY %2(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FABS]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FNEG %1 + %3:_(f64) = G_FABS %2 + %4:_(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -72,13 +84,17 @@ body: | ; CHECK-LABEL: name: test_v2f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FNEG %0 - %2:_(<2 x s16>) = G_FABS %1 - $vgpr0 = COPY %2(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FNEG %1 + %3:_(<2 x f16>) = G_FABS %2 + %4:_(<2 x i16>) = G_BITCAST %3(<2 x f16>) + $vgpr0 = COPY %4(<2 x i16>) ... --- @@ -91,12 +107,16 @@ body: | ; CHECK-LABEL: name: test_v3f32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(<3 x s32>) = G_FABS [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FABS]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FNEG %0 - %2:_(<3 x s32>) = G_FABS %1 - $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(<3 x f32>) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FABS]](<3 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FNEG %1 + %3:_(<3 x f32>) = G_FABS %2 + %4:_(<3 x i32>) = G_BITCAST %3(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %4(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir index 020761352148f..9c418173bcf8c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir @@ -12,13 +12,17 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCANONICALIZE %0 - %2:_(s32) = G_FCANONICALIZE %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCANONICALIZE %1 + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -29,11 +33,13 @@ body: | bb.0: ; CHECK-LABEL: name: test_fconstant - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+10 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_FCONSTANT float 1.0e10 - %1:_(s32) = G_FCANONICALIZE %0 - $vgpr0 = COPY %1(s32) + ; CHECK: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+10 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(f32) = G_FCONSTANT float 1.000000e+10 + %1:_(f32) = G_FCANONICALIZE %0 + %2:_(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... # FIXME: Mode fields are redundant and not considered. @@ -49,11 +55,13 @@ body: | bb.0: ; CHECK-LABEL: name: test_denormal_fconstant - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.618950e-319 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s64) = G_FCONSTANT double 0x0000000000008000 - %1:_(s64) = G_FCANONICALIZE %0 - $vgpr0_vgpr1 = COPY %1(s64) + ; CHECK: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.618950e-319 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[C]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(f64) = G_FCONSTANT double 1.618950e-319 + %1:_(f64) = G_FCANONICALIZE %0 + %2:_(i64) = G_BITCAST %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -70,25 +78,33 @@ body: | ; CHECK-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; CHECK-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %7:_(s32) = G_FCANONICALIZE %0 - %8:_(s32) = G_FCANONICALIZE %1 - %2:_(s32) = G_FMINNUM_IEEE %7, %8 - %3:_(s32) = COPY $vgpr2 - %5:_(s32) = G_FCANONICALIZE %2 - %6:_(s32) = G_FCANONICALIZE %3 - %4:_(s32) = G_FMINNUM_IEEE %5, %6 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; CHECK-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FCANONICALIZE %4 + %6:_(f32) = G_FMINNUM_IEEE %3, %5 + %7:_(i32) = COPY $vgpr2 + %8:_(f32) = G_FCANONICALIZE %6 + %9:_(f32) = G_BITCAST %7(i32) + %10:_(f32) = G_FCANONICALIZE %9 + %11:_(f32) = G_FMINNUM_IEEE %8, %10 + %12:_(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -105,25 +121,33 @@ body: | ; CHECK-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %7:_(s32) = G_FCANONICALIZE %0 - %8:_(s32) = G_FCANONICALIZE %1 - %2:_(s32) = G_FMAXNUM_IEEE %7, %8 - %3:_(s32) = COPY $vgpr2 - %5:_(s32) = G_FCANONICALIZE %2 - %6:_(s32) = G_FCANONICALIZE %3 - %4:_(s32) = G_FMINNUM_IEEE %5, %6 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FCANONICALIZE %4 + %6:_(f32) = G_FMAXNUM_IEEE %3, %5 + %7:_(i32) = COPY $vgpr2 + %8:_(f32) = G_FCANONICALIZE %6 + %9:_(f32) = G_BITCAST %7(i32) + %10:_(f32) = G_FCANONICALIZE %9 + %11:_(f32) = G_FMINNUM_IEEE %8, %10 + %12:_(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -140,25 +164,33 @@ body: | ; CHECK-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; CHECK-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %7:_(s32) = G_FCANONICALIZE %0 - %8:_(s32) = G_FCANONICALIZE %1 - %2:_(s32) = G_FMAXNUM_IEEE %7, %8 - %3:_(s32) = COPY $vgpr2 - %5:_(s32) = G_FCANONICALIZE %2 - %6:_(s32) = G_FCANONICALIZE %3 - %4:_(s32) = G_FMAXNUM_IEEE %5, %6 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; CHECK-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FCANONICALIZE %4 + %6:_(f32) = G_FMAXNUM_IEEE %3, %5 + %7:_(i32) = COPY $vgpr2 + %8:_(f32) = G_FCANONICALIZE %6 + %9:_(f32) = G_BITCAST %7(i32) + %10:_(f32) = G_FCANONICALIZE %9 + %11:_(f32) = G_FMAXNUM_IEEE %8, %10 + %12:_(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -175,25 +207,33 @@ body: | ; CHECK-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %7:_(s32) = G_FCANONICALIZE %0 - %8:_(s32) = G_FCANONICALIZE %1 - %2:_(s32) = G_FMINNUM_IEEE %7, %8 - %3:_(s32) = COPY $vgpr2 - %5:_(s32) = G_FCANONICALIZE %2 - %6:_(s32) = G_FCANONICALIZE %3 - %4:_(s32) = G_FMAXNUM_IEEE %5, %6 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FCANONICALIZE %4 + %6:_(f32) = G_FMINNUM_IEEE %3, %5 + %7:_(i32) = COPY $vgpr2 + %8:_(f32) = G_FCANONICALIZE %6 + %9:_(f32) = G_BITCAST %7(i32) + %10:_(f32) = G_FCANONICALIZE %9 + %11:_(f32) = G_FMAXNUM_IEEE %8, %10 + %12:_(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -210,22 +250,28 @@ body: | ; CHECK-LABEL: name: test_multiple_uses ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FMINNUM_IEEE]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %6:_(s32) = G_FCANONICALIZE %0 - %7:_(s32) = G_FCANONICALIZE %1 - %2:_(s32) = G_FMINNUM_IEEE %6, %7 - %4:_(s32) = G_FCANONICALIZE %2 - %5:_(s32) = G_FCANONICALIZE %2 - %3:_(s32) = G_FMAXNUM_IEEE %4, %5 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FMINNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FCANONICALIZE %4 + %6:_(f32) = G_FMINNUM_IEEE %3, %5 + %7:_(f32) = G_FCANONICALIZE %6 + %8:_(f32) = G_FCANONICALIZE %6 + %9:_(f32) = G_FMAXNUM_IEEE %7, %8 + %10:_(i32) = G_BITCAST %9(f32) + $vgpr0 = COPY %10(i32) ... --- @@ -242,40 +288,58 @@ body: | ; CHECK-LABEL: name: test_splat_padded_with_undef ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: %two:_(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: %two_s32:_(s32) = G_ANYEXT %two(s16) - ; CHECK-NEXT: %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) - ; CHECK-NEXT: %zero:_(s16) = G_FCONSTANT half 0xH0000 - ; CHECK-NEXT: %zero_s32:_(s32) = G_ANYEXT %zero(s16) - ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zero_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %zero_s32(s32), %undef(s32) - ; CHECK-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00 - ; CHECK-NEXT: %one_s32:_(s32) = G_ANYEXT %one(s16) - ; CHECK-NEXT: %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FMUL]], %zero_undef - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %one_undef - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %two:_(s16) = G_FCONSTANT half 0xH4000 - %two_s32:_(s32) = G_ANYEXT %two(s16) - %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) - %zero:_(s16) = G_FCONSTANT half 0xH0000 - %zero_s32:_(s32) = G_ANYEXT %zero(s16) - %undef:_(s32) = G_IMPLICIT_DEF - %zero_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %zero_s32(s32), %undef(s32) - %one:_(s16) = G_FCONSTANT half 0xH3C00 - %one_s32:_(s32) = G_ANYEXT %one(s16) - %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32) - %4:_(<2 x s16>) = G_FMUL %0, %two_splat - %zero_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %zero_undef - %16:_(<2 x s16>) = G_FCANONICALIZE %4 - %8:_(<2 x s16>) = G_FMAXNUM_IEEE %zero_undef_fcan, %16 - %one_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %one_undef - %14:_(<2 x s16>) = G_FCANONICALIZE %8 - %11:_(<2 x s16>) = G_FMINNUM_IEEE %one_undef_fcan, %14 - $vgpr0 = COPY %11(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: %two:_(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST %two(f16) + ; CHECK-NEXT: %two_s32:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: %two_splat:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %two_s32(i32), %two_s32(i32) + ; CHECK-NEXT: %zero:_(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %zero(f16) + ; CHECK-NEXT: %zero_s32:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: %zero_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %zero_s32(i32), %undef(i32) + ; CHECK-NEXT: %one:_(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST %one(f16) + ; CHECK-NEXT: %one_s32:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; CHECK-NEXT: %one_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %one_s32(i32), %undef(i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %two_splat(<2 x i16>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %zero_undef(<2 x i16>) + ; CHECK-NEXT: %zero_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST5]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE %zero_undef_fcan, [[FMUL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %one_undef(<2 x i16>) + ; CHECK-NEXT: %one_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST6]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE %one_undef_fcan, [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %two:_(f16) = G_FCONSTANT half 0xH4000 + %2:_(i16) = G_BITCAST %two(f16) + %two_s32:_(i32) = G_ANYEXT %2(i16) + %two_splat:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %two_s32(i32), %two_s32(i32) + %zero:_(f16) = G_FCONSTANT half 0xH0000 + %6:_(i16) = G_BITCAST %zero(f16) + %zero_s32:_(i32) = G_ANYEXT %6(i16) + %undef:_(i32) = G_IMPLICIT_DEF + %zero_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %zero_s32(i32), %undef(i32) + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %11:_(i16) = G_BITCAST %one(f16) + %one_s32:_(i32) = G_ANYEXT %11(i16) + %one_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %one_s32(i32), %undef(i32) + %14:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %15:_(<2 x f16>) = G_BITCAST %two_splat(<2 x i16>) + %16:_(<2 x f16>) = G_FMUL %14, %15 + %17:_(<2 x f16>) = G_BITCAST %zero_undef(<2 x i16>) + %zero_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE %17 + %19:_(<2 x f16>) = G_FCANONICALIZE %16 + %20:_(<2 x f16>) = G_FMAXNUM_IEEE %zero_undef_fcan, %19 + %21:_(<2 x f16>) = G_BITCAST %one_undef(<2 x i16>) + %one_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE %21 + %23:_(<2 x f16>) = G_FCANONICALIZE %20 + %24:_(<2 x f16>) = G_FMINNUM_IEEE %one_undef_fcan, %23 + %25:_(<2 x i16>) = G_BITCAST %24(<2 x f16>) + $vgpr0 = COPY %25(<2 x i16>) ... --- @@ -292,41 +356,58 @@ body: | ; CHECK-LABEL: name: test_splat_SNaN_and_QNaN_padded_with_undef ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: %two:_(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: %two_s32:_(s32) = G_ANYEXT %two(s16) - ; CHECK-NEXT: %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) - ; CHECK-NEXT: %snan:_(s16) = G_FCONSTANT half 0xH7C01 - ; CHECK-NEXT: %snan_s32:_(s32) = G_ANYEXT %snan(s16) - ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: %snan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %snan_s32(s32), %undef(s32) - ; CHECK-NEXT: %qnan:_(s16) = G_FCONSTANT half 0xH7E01 - ; CHECK-NEXT: %qnan_s32:_(s32) = G_ANYEXT %qnan(s16) - ; CHECK-NEXT: %qnan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %qnan_s32(s32), %undef(s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat - ; CHECK-NEXT: %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, [[FMUL]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %qnan_undef - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %two:_(s16) = G_FCONSTANT half 0xH4000 - %two_s32:_(s32) = G_ANYEXT %two(s16) - %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) - %snan:_(s16) = G_FCONSTANT half 0xH7C01 - %snan_s32:_(s32) = G_ANYEXT %snan(s16) - %undef:_(s32) = G_IMPLICIT_DEF - %snan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %snan_s32(s32), %undef(s32) - %qnan:_(s16) = G_FCONSTANT half 0xH7E01 - %qnan_s32:_(s32) = G_ANYEXT %qnan(s16) - %qnan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %qnan_s32(s32), %undef(s32) - %4:_(<2 x s16>) = G_FMUL %0, %two_splat - %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef - %16:_(<2 x s16>) = G_FCANONICALIZE %4 - %8:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, %16 - %qnan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %qnan_undef - %14:_(<2 x s16>) = G_FCANONICALIZE %8 - %11:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef_fcan, %14 - $vgpr0 = COPY %11(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: %two:_(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST %two(f16) + ; CHECK-NEXT: %two_s32:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: %two_splat:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %two_s32(i32), %two_s32(i32) + ; CHECK-NEXT: %snan:_(f16) = G_FCONSTANT half 0xH7C01 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %snan(f16) + ; CHECK-NEXT: %snan_s32:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: %snan_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %snan_s32(i32), %undef(i32) + ; CHECK-NEXT: %qnan:_(f16) = G_FCONSTANT half 0xH7E01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST %qnan(f16) + ; CHECK-NEXT: %qnan_s32:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; CHECK-NEXT: %qnan_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %qnan_s32(i32), %undef(i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %two_splat(<2 x i16>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %snan_undef(<2 x i16>) + ; CHECK-NEXT: %snan_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST5]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE %snan_undef_fcan, [[FMUL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %qnan_undef(<2 x i16>) + ; CHECK-NEXT: %qnan_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST6]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE %qnan_undef_fcan, [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %two:_(f16) = G_FCONSTANT half 0xH4000 + %2:_(i16) = G_BITCAST %two(f16) + %two_s32:_(i32) = G_ANYEXT %2(i16) + %two_splat:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %two_s32(i32), %two_s32(i32) + %snan:_(f16) = G_FCONSTANT half 0xH7C01 + %6:_(i16) = G_BITCAST %snan(f16) + %snan_s32:_(i32) = G_ANYEXT %6(i16) + %undef:_(i32) = G_IMPLICIT_DEF + %snan_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %snan_s32(i32), %undef(i32) + %qnan:_(f16) = G_FCONSTANT half 0xH7E01 + %11:_(i16) = G_BITCAST %qnan(f16) + %qnan_s32:_(i32) = G_ANYEXT %11(i16) + %qnan_undef:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %qnan_s32(i32), %undef(i32) + %14:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %15:_(<2 x f16>) = G_BITCAST %two_splat(<2 x i16>) + %16:_(<2 x f16>) = G_FMUL %14, %15 + %17:_(<2 x f16>) = G_BITCAST %snan_undef(<2 x i16>) + %snan_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE %17 + %19:_(<2 x f16>) = G_FCANONICALIZE %16 + %20:_(<2 x f16>) = G_FMAXNUM_IEEE %snan_undef_fcan, %19 + %21:_(<2 x f16>) = G_BITCAST %qnan_undef(<2 x i16>) + %qnan_undef_fcan:_(<2 x f16>) = G_FCANONICALIZE %21 + %23:_(<2 x f16>) = G_FCANONICALIZE %20 + %24:_(<2 x f16>) = G_FMINNUM_IEEE %qnan_undef_fcan, %23 + %25:_(<2 x i16>) = G_BITCAST %24(<2 x f16>) + $vgpr0 = COPY %25(<2 x i16>) ... --- @@ -340,13 +421,17 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize_log ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %0 - %2:_(s32) = G_FCANONICALIZE %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %1(f32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -360,11 +445,15 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize_exp2 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %0 - %2:_(s32) = G_FCANONICALIZE %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %1(f32) + %3:_(f32) = G_FCANONICALIZE %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir index 6c5339e36c77f..f9bf3efe3a5f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir @@ -11,18 +11,22 @@ body: | ; GCN-LABEL: name: rsq_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16) - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT [[INT]](s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %one:_(s16) = G_FCONSTANT half 1.0 - %rsq:_(s16) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = contract G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -36,20 +40,24 @@ body: | ; GCN-LABEL: name: rsq_f16_missing_contract0 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x - ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = G_FSQRT %x - %one:_(s16) = G_FCONSTANT half 1.0 - %rsq:_(s16) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: %rsq:_(f16) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = G_FSQRT %2 + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = contract G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -63,20 +71,24 @@ body: | ; GCN-LABEL: name: rsq_f16_missing_contract1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x - ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: %rsq:_(s16) = G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %one:_(s16) = G_FCONSTANT half 1.0 - %rsq:_(s16) = G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: %rsq:_(f16) = G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -90,19 +102,23 @@ body: | ; GCN-LABEL: name: neg_rsq_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16) - ; GCN-NEXT: %rsq:_(s16) = contract G_FNEG [[INT]] - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %neg_one:_(s16) = G_FCONSTANT half -1.0 - %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; GCN-NEXT: %rsq:_(f16) = contract G_FNEG [[INT]] + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %neg_one:_(f16) = G_FCONSTANT half 0xHBC00 + %rsq:_(f16) = contract G_FDIV %neg_one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -116,20 +132,24 @@ body: | ; GCN-LABEL: name: neg_rsq_f16_missing_contract0 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x - ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00 - ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = G_FSQRT %x - %neg_one:_(s16) = G_FCONSTANT half -1.0 - %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = G_FSQRT [[BITCAST]] + ; GCN-NEXT: %neg_one:_(f16) = G_FCONSTANT half 0xHBC00 + ; GCN-NEXT: %rsq:_(f16) = contract G_FDIV %neg_one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = G_FSQRT %2 + %neg_one:_(f16) = G_FCONSTANT half 0xHBC00 + %rsq:_(f16) = contract G_FDIV %neg_one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -143,20 +163,24 @@ body: | ; GCN-LABEL: name: neg_rsq_f16_missing_contract1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x - ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00 - ; GCN-NEXT: %rsq:_(s16) = G_FDIV %neg_one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %neg_one:_(s16) = G_FCONSTANT half -1.0 - %rsq:_(s16) = G_FDIV %neg_one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %neg_one:_(f16) = G_FCONSTANT half 0xHBC00 + ; GCN-NEXT: %rsq:_(f16) = G_FDIV %neg_one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %neg_one:_(f16) = G_FCONSTANT half 0xHBC00 + %rsq:_(f16) = G_FDIV %neg_one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -170,22 +194,28 @@ body: | ; GCN-LABEL: name: rsq_f16_multi_use ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x - ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %one:_(s16) = G_FCONSTANT half 1.0 - %rsq:_(s16) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext - S_ENDPGM 0, implicit %sqrt + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: %rsq:_(f16) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST %sqrt(f16) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST2]](i16) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = contract G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) + %8:_(i16) = G_BITCAST %sqrt(f16) + S_ENDPGM 0, implicit %8(i16) ... @@ -199,22 +229,28 @@ body: | ; GCN-LABEL: name: rsq_f16_multi_use_missing_contract0 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x - ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = G_FSQRT %x - %one:_(s16) = G_FCONSTANT half 1.0 - %rsq:_(s16) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext - S_ENDPGM 0, implicit %sqrt + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: %rsq:_(f16) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST %sqrt(f16) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST2]](i16) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = G_FSQRT %2 + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = contract G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) + %8:_(i16) = G_BITCAST %sqrt(f16) + S_ENDPGM 0, implicit %8(i16) ... @@ -228,22 +264,28 @@ body: | ; GCN-LABEL: name: rsq_f16_multi_use_missing_contract1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x - ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: %rsq:_(s16) = G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %one:_(s16) = G_FCONSTANT half 1.0 - %rsq:_(s16) = G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext - S_ENDPGM 0, implicit %sqrt + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %sqrt:_(f16) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: %rsq:_(f16) = G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST %sqrt(f16) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST2]](i16) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %one:_(f16) = G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) + %8:_(i16) = G_BITCAST %sqrt(f16) + S_ENDPGM 0, implicit %8(i16) ... @@ -257,16 +299,20 @@ body: | ; GCN-LABEL: name: rsq_f32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %x:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %sqrt:_(s32) = contract G_FSQRT %x - ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GCN-NEXT: %rsq:_(s32) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: $vgpr0 = COPY %rsq(s32) - %x:_(s32) = COPY $vgpr0 - %sqrt:_(s32) = contract G_FSQRT %x - %one:_(s32) = G_FCONSTANT float 1.0 - %rsq:_(s32) = contract G_FDIV %one, %sqrt - $vgpr0 = COPY %rsq + ; GCN-NEXT: %x:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %x(i32) + ; GCN-NEXT: %sqrt:_(f32) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GCN-NEXT: %rsq:_(f32) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %rsq(f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %x:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %x(i32) + %sqrt:_(f32) = contract G_FSQRT %1 + %one:_(f32) = G_FCONSTANT float 1.000000e+00 + %rsq:_(f32) = contract G_FDIV %one, %sqrt + %5:_(i32) = G_BITCAST %rsq(f32) + $vgpr0 = COPY %5(i32) ... @@ -280,16 +326,20 @@ body: | ; GCN-LABEL: name: neg_rsq_f32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %x:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %sqrt:_(s32) = contract G_FSQRT %x - ; GCN-NEXT: %neg_one:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GCN-NEXT: %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt - ; GCN-NEXT: $vgpr0 = COPY %rsq(s32) - %x:_(s32) = COPY $vgpr0 - %sqrt:_(s32) = contract G_FSQRT %x - %neg_one:_(s32) = G_FCONSTANT float -1.0 - %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt - $vgpr0 = COPY %rsq + ; GCN-NEXT: %x:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %x(i32) + ; GCN-NEXT: %sqrt:_(f32) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %neg_one:_(f32) = G_FCONSTANT float -1.000000e+00 + ; GCN-NEXT: %rsq:_(f32) = contract G_FDIV %neg_one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %rsq(f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %x:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %x(i32) + %sqrt:_(f32) = contract G_FSQRT %1 + %neg_one:_(f32) = G_FCONSTANT float -1.000000e+00 + %rsq:_(f32) = contract G_FDIV %neg_one, %sqrt + %5:_(i32) = G_BITCAST %rsq(f32) + $vgpr0 = COPY %5(i32) ... @@ -303,16 +353,20 @@ body: | ; GCN-LABEL: name: afn_rsq_f32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %x:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x - ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %one, %sqrt - ; GCN-NEXT: $vgpr0 = COPY %rsq(s32) - %x:_(s32) = COPY $vgpr0 - %sqrt:_(s32) = contract afn G_FSQRT %x - %one:_(s32) = G_FCONSTANT float 1.0 - %rsq:_(s32) = contract afn G_FDIV %one, %sqrt - $vgpr0 = COPY %rsq + ; GCN-NEXT: %x:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %x(i32) + ; GCN-NEXT: %sqrt:_(f32) = contract afn G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GCN-NEXT: %rsq:_(f32) = contract afn G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %rsq(f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %x:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %x(i32) + %sqrt:_(f32) = contract afn G_FSQRT %1 + %one:_(f32) = G_FCONSTANT float 1.000000e+00 + %rsq:_(f32) = contract afn G_FDIV %one, %sqrt + %5:_(i32) = G_BITCAST %rsq(f32) + $vgpr0 = COPY %5(i32) ... @@ -326,18 +380,22 @@ body: | ; GCN-LABEL: name: afn_rsq_f32_multi_use ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %x:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x - ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %one, %sqrt - ; GCN-NEXT: %ret:_(s32) = G_FSUB %sqrt, %rsq - ; GCN-NEXT: $vgpr0 = COPY %ret(s32) - %x:_(s32) = COPY $vgpr0 - %sqrt:_(s32) = contract afn G_FSQRT %x - %one:_(s32) = G_FCONSTANT float 1.0 - %rsq:_(s32) = contract afn G_FDIV %one, %sqrt - %ret:_(s32) = G_FSUB %sqrt, %rsq - $vgpr0 = COPY %ret + ; GCN-NEXT: %x:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %x(i32) + ; GCN-NEXT: %sqrt:_(f32) = contract afn G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GCN-NEXT: %rsq:_(f32) = contract afn G_FDIV %one, %sqrt + ; GCN-NEXT: %ret:_(f32) = G_FSUB %sqrt, %rsq + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %ret(f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %x:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %x(i32) + %sqrt:_(f32) = contract afn G_FSQRT %1 + %one:_(f32) = G_FCONSTANT float 1.000000e+00 + %rsq:_(f32) = contract afn G_FDIV %one, %sqrt + %ret:_(f32) = G_FSUB %sqrt, %rsq + %6:_(i32) = G_BITCAST %ret(f32) + $vgpr0 = COPY %6(i32) ... @@ -351,16 +409,20 @@ body: | ; GCN-LABEL: name: afn_neg_rsq_f32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %x:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x - ; GCN-NEXT: %neg_one:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %neg_one, %sqrt - ; GCN-NEXT: $vgpr0 = COPY %rsq(s32) - %x:_(s32) = COPY $vgpr0 - %sqrt:_(s32) = contract afn G_FSQRT %x - %neg_one:_(s32) = G_FCONSTANT float -1.0 - %rsq:_(s32) = contract afn G_FDIV %neg_one, %sqrt - $vgpr0 = COPY %rsq + ; GCN-NEXT: %x:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %x(i32) + ; GCN-NEXT: %sqrt:_(f32) = contract afn G_FSQRT [[BITCAST]] + ; GCN-NEXT: %neg_one:_(f32) = G_FCONSTANT float -1.000000e+00 + ; GCN-NEXT: %rsq:_(f32) = contract afn G_FDIV %neg_one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %rsq(f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %x:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %x(i32) + %sqrt:_(f32) = contract afn G_FSQRT %1 + %neg_one:_(f32) = G_FCONSTANT float -1.000000e+00 + %rsq:_(f32) = contract afn G_FDIV %neg_one, %sqrt + %5:_(i32) = G_BITCAST %rsq(f32) + $vgpr0 = COPY %5(i32) ... @@ -375,20 +437,24 @@ body: | ; GCN-LABEL: name: rsq_f64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s64) = contract G_FSQRT %x - ; GCN-NEXT: %one:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GCN-NEXT: %rsq:_(s64) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s64) = G_ANYEXT %0:_(s32) - %sqrt:_(s64) = contract G_FSQRT %x - %one:_(s64) = G_FCONSTANT double 1.0 - %rsq:_(s64) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_TRUNC %rsq:_(s64) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i64) = G_ANYEXT [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %x(i64) + ; GCN-NEXT: %sqrt:_(f64) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GCN-NEXT: %rsq:_(f64) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %rsq(f64) + ; GCN-NEXT: %ext:_(i32) = G_TRUNC [[BITCAST1]](i64) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i64) = G_ANYEXT %0(i32) + %2:_(f64) = G_BITCAST %x(i64) + %sqrt:_(f64) = contract G_FSQRT %2 + %one:_(f64) = G_FCONSTANT double 1.000000e+00 + %rsq:_(f64) = contract G_FDIV %one, %sqrt + %6:_(i64) = G_BITCAST %rsq(f64) + %ext:_(i32) = G_TRUNC %6(i64) + $vgpr0 = COPY %ext(i32) ... @@ -402,20 +468,24 @@ body: | ; GCN-LABEL: name: neg_rsq_f64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s64) = contract G_FSQRT %x - ; GCN-NEXT: %neg_one:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GCN-NEXT: %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s64) = G_ANYEXT %0:_(s32) - %sqrt:_(s64) = contract G_FSQRT %x - %neg_one:_(s64) = G_FCONSTANT double -1.0 - %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt - %ext:_(s32) = G_TRUNC %rsq:_(s64) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i64) = G_ANYEXT [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %x(i64) + ; GCN-NEXT: %sqrt:_(f64) = contract G_FSQRT [[BITCAST]] + ; GCN-NEXT: %neg_one:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GCN-NEXT: %rsq:_(f64) = contract G_FDIV %neg_one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %rsq(f64) + ; GCN-NEXT: %ext:_(i32) = G_TRUNC [[BITCAST1]](i64) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i64) = G_ANYEXT %0(i32) + %2:_(f64) = G_BITCAST %x(i64) + %sqrt:_(f64) = contract G_FSQRT %2 + %neg_one:_(f64) = G_FCONSTANT double -1.000000e+00 + %rsq:_(f64) = contract G_FDIV %neg_one, %sqrt + %6:_(i64) = G_BITCAST %rsq(f64) + %ext:_(i32) = G_TRUNC %6(i64) + $vgpr0 = COPY %ext(i32) ... @@ -429,20 +499,24 @@ body: | ; GCN-LABEL: name: afn_rsq_f64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s64) = contract afn G_FSQRT %x - ; GCN-NEXT: %one:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GCN-NEXT: %rsq:_(s64) = contract afn G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s64) = G_ANYEXT %0:_(s32) - %sqrt:_(s64) = contract afn G_FSQRT %x - %one:_(s64) = G_FCONSTANT double 1.0 - %rsq:_(s64) = contract afn G_FDIV %one, %sqrt - %ext:_(s32) = G_TRUNC %rsq:_(s64) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i64) = G_ANYEXT [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %x(i64) + ; GCN-NEXT: %sqrt:_(f64) = contract afn G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GCN-NEXT: %rsq:_(f64) = contract afn G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %rsq(f64) + ; GCN-NEXT: %ext:_(i32) = G_TRUNC [[BITCAST1]](i64) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i64) = G_ANYEXT %0(i32) + %2:_(f64) = G_BITCAST %x(i64) + %sqrt:_(f64) = contract afn G_FSQRT %2 + %one:_(f64) = G_FCONSTANT double 1.000000e+00 + %rsq:_(f64) = contract afn G_FDIV %one, %sqrt + %6:_(i64) = G_BITCAST %rsq(f64) + %ext:_(i32) = G_TRUNC %6(i64) + $vgpr0 = COPY %ext(i32) ... @@ -456,20 +530,24 @@ body: | ; GCN-LABEL: name: afn_neg_rsq_f64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s64) = contract afn G_FSQRT %x - ; GCN-NEXT: %neg_one:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GCN-NEXT: %rsq:_(s64) = contract afn G_FDIV %neg_one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s64) = G_ANYEXT %0:_(s32) - %sqrt:_(s64) = contract afn G_FSQRT %x - %neg_one:_(s64) = G_FCONSTANT double -1.0 - %rsq:_(s64) = contract afn G_FDIV %neg_one, %sqrt - %ext:_(s32) = G_TRUNC %rsq:_(s64) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i64) = G_ANYEXT [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %x(i64) + ; GCN-NEXT: %sqrt:_(f64) = contract afn G_FSQRT [[BITCAST]] + ; GCN-NEXT: %neg_one:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GCN-NEXT: %rsq:_(f64) = contract afn G_FDIV %neg_one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %rsq(f64) + ; GCN-NEXT: %ext:_(i32) = G_TRUNC [[BITCAST1]](i64) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i64) = G_ANYEXT %0(i32) + %2:_(f64) = G_BITCAST %x(i64) + %sqrt:_(f64) = contract afn G_FSQRT %2 + %neg_one:_(f64) = G_FCONSTANT double -1.000000e+00 + %rsq:_(f64) = contract afn G_FDIV %neg_one, %sqrt + %6:_(i64) = G_BITCAST %rsq(f64) + %ext:_(i32) = G_TRUNC %6(i64) + $vgpr0 = COPY %ext(i32) ... @@ -484,20 +562,24 @@ body: | ; GCN-LABEL: name: rsq_fract_num_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %fract:_(s16) = G_FCONSTANT half 0xH3800 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16) - ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %fract - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %fract:_(s16) = G_FCONSTANT half 0.5 - %rsq:_(s16) = contract G_FDIV %fract, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %fract:_(f16) = G_FCONSTANT half 0xH3800 + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; GCN-NEXT: %rsq:_(f16) = contract G_FMUL [[INT]], %fract + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %fract:_(f16) = G_FCONSTANT half 0xH3800 + %rsq:_(f16) = contract G_FDIV %fract, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -511,20 +593,24 @@ body: | ; GCN-LABEL: name: neg_rsq_fract_num_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %neg_fract:_(s16) = G_FCONSTANT half 0xHB800 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16) - ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %neg_fract - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %neg_fract:_(s16) = G_FCONSTANT half -0.5 - %rsq:_(s16) = contract G_FDIV %neg_fract, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %neg_fract:_(f16) = G_FCONSTANT half 0xHB800 + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; GCN-NEXT: %rsq:_(f16) = contract G_FMUL [[INT]], %neg_fract + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %neg_fract:_(f16) = G_FCONSTANT half 0xHB800 + %rsq:_(f16) = contract G_FDIV %neg_fract, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -539,20 +625,24 @@ body: | ; GCN-LABEL: name: rsq_large_num_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %ten:_(s16) = G_FCONSTANT half 0xH4900 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16) - ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %ten - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %ten:_(s16) = G_FCONSTANT half 10.0 - %rsq:_(s16) = contract G_FDIV %ten, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %ten:_(f16) = G_FCONSTANT half 0xH4900 + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; GCN-NEXT: %rsq:_(f16) = contract G_FMUL [[INT]], %ten + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %ten:_(f16) = G_FCONSTANT half 0xH4900 + %rsq:_(f16) = contract G_FDIV %ten, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... @@ -566,19 +656,23 @@ body: | ; GCN-LABEL: name: neg_rsq_large_num_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %neg_ten:_(s16) = G_FCONSTANT half 0xHC900 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16) - ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %neg_ten - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %sqrt:_(s16) = contract G_FSQRT %x - %neg_ten:_(s16) = G_FCONSTANT half -10.0 - %rsq:_(s16) = contract G_FDIV %neg_ten, %sqrt - %ext:_(s32) = G_ANYEXT %rsq:_(s16) - $vgpr0 = COPY %ext + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; GCN-NEXT: %neg_ten:_(f16) = G_FCONSTANT half 0xHC900 + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; GCN-NEXT: %rsq:_(f16) = contract G_FMUL [[INT]], %neg_ten + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %x(i16) + %sqrt:_(f16) = contract G_FSQRT %2 + %neg_ten:_(f16) = G_FCONSTANT half 0xHC900 + %rsq:_(f16) = contract G_FDIV %neg_ten, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir index 789385dcbae82..fcba3c711f784 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -17,85 +17,128 @@ body: | ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %4:_(s32) = G_FMUL %0, %1 - %5:_(s32) = G_FADD %4, %2 - $vgpr0 = COPY %5(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FMUL %3, %4 + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = G_FADD %5, %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -108,85 +151,128 @@ body: | ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] - ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %4:_(s32) = G_FMUL %0, %1 - %5:_(s32) = G_FADD %2, %4 - $vgpr0 = COPY %5(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FMUL %3, %4 + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = G_FADD %6, %5 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -199,114 +285,157 @@ body: | ; GFX9-LABEL: name: test_add_mul_multiple_defs_z ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-LABEL: name: test_add_mul_multiple_defs_z ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] - ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %4:_(s32) = COPY $vgpr2 - %5:_(s32) = COPY $vgpr3 - %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) - %6:_(s32) = G_FMUL %0, %1 - %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1) - %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) - %8:_(s32) = COPY %13(s32) - %10:_(s32) = G_FADD %6, %8 - $vgpr0 = COPY %10(s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(f32) = G_BITCAST %0(i32) + %6:_(f32) = G_BITCAST %1(i32) + %7:_(f32) = G_FMUL %5, %6 + %8:_(<2 x i32>) = G_LOAD %4(p1) :: (load (<2 x i32>), addrspace 1) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(<2 x i32>) + %11:_(i32) = COPY %10(i32) + %12:_(f32) = G_BITCAST %11(i32) + %13:_(f32) = G_FADD %7, %12 + %14:_(i32) = G_BITCAST %13(f32) + $vgpr0 = COPY %14(i32) ... --- @@ -318,114 +447,157 @@ body: | ; GFX9-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] - ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX10-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %4:_(s32) = COPY $vgpr2 - %5:_(s32) = COPY $vgpr3 - %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) - %6:_(s32) = G_FMUL %0, %1 - %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1) - %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) - %8:_(s32) = COPY %13(s32) - %10:_(s32) = G_FADD %8, %6 - $vgpr0 = COPY %10(s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[MV]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(f32) = G_BITCAST %0(i32) + %6:_(f32) = G_BITCAST %1(i32) + %7:_(f32) = G_FMUL %5, %6 + %8:_(<2 x i32>) = G_LOAD %4(p1) :: (load (<2 x i32>), addrspace 1) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(<2 x i32>) + %11:_(i32) = COPY %10(i32) + %12:_(f32) = G_BITCAST %11(i32) + %13:_(f32) = G_FADD %12, %7 + %14:_(i32) = G_BITCAST %13(f32) + $vgpr0 = COPY %14(i32) ... --- @@ -437,121 +609,164 @@ body: | ; GFX9-LABEL: name: test_half_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_half_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %4:_(s32) = COPY $vgpr0 - %0:_(s16) = G_TRUNC %4(s32) - %5:_(s32) = COPY $vgpr1 - %1:_(s16) = G_TRUNC %5(s32) - %6:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %6(s32) - %7:_(s16) = G_FMUL %0, %1 - %8:_(s16) = G_FADD %7, %2 - %10:_(s32) = G_ANYEXT %8(s16) - $vgpr0 = COPY %10(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(i32) = COPY $vgpr2 + %5:_(i16) = G_TRUNC %4(i32) + %6:_(f16) = G_BITCAST %1(i16) + %7:_(f16) = G_BITCAST %3(i16) + %8:_(f16) = G_FMUL %6, %7 + %9:_(f16) = G_BITCAST %5(i16) + %10:_(f16) = G_FADD %8, %9 + %11:_(i16) = G_BITCAST %10(f16) + %12:_(i32) = G_ANYEXT %11(i16) + $vgpr0 = COPY %12(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -564,121 +779,164 @@ body: | ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %4:_(s32) = COPY $vgpr0 - %0:_(s16) = G_TRUNC %4(s32) - %5:_(s32) = COPY $vgpr1 - %1:_(s16) = G_TRUNC %5(s32) - %6:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %6(s32) - %7:_(s16) = G_FMUL %0, %1 - %8:_(s16) = G_FADD %2, %7 - %10:_(s32) = G_ANYEXT %8(s16) - $vgpr0 = COPY %10(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(i32) = COPY $vgpr2 + %5:_(i16) = G_TRUNC %4(i32) + %6:_(f16) = G_BITCAST %1(i16) + %7:_(f16) = G_BITCAST %3(i16) + %8:_(f16) = G_FMUL %6, %7 + %9:_(f16) = G_BITCAST %5(i16) + %10:_(f16) = G_FADD %9, %8 + %11:_(i16) = G_BITCAST %10(f16) + %12:_(i32) = G_ANYEXT %11(i16) + $vgpr0 = COPY %12(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -691,157 +949,200 @@ body: | ; GFX9-LABEL: name: test_double_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_double_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %10:_(s64) = G_FMUL %0, %1 - %11:_(s64) = G_FADD %10, %2 - %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) - $vgpr0 = COPY %13(s32) - $vgpr1 = COPY %14(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(i64) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(f64) = G_BITCAST %2(i64) + %10:_(f64) = G_BITCAST %5(i64) + %11:_(f64) = G_FMUL %9, %10 + %12:_(f64) = G_BITCAST %8(i64) + %13:_(f64) = G_FADD %11, %12 + %14:_(i64) = G_BITCAST %13(f64) + %15:_(i32), %16:_(i32) = G_UNMERGE_VALUES %14(i64) + $vgpr0 = COPY %15(i32) + $vgpr1 = COPY %16(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -854,157 +1155,200 @@ body: | ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %10:_(s64) = G_FMUL %0, %1 - %11:_(s64) = G_FADD %2, %10 - %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) - $vgpr0 = COPY %13(s32) - $vgpr1 = COPY %14(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(i64) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(f64) = G_BITCAST %2(i64) + %10:_(f64) = G_BITCAST %5(i64) + %11:_(f64) = G_FMUL %9, %10 + %12:_(f64) = G_BITCAST %8(i64) + %13:_(f64) = G_FADD %12, %11 + %14:_(i64) = G_BITCAST %13(f64) + %15:_(i32), %16:_(i32) = G_UNMERGE_VALUES %14(i64) + $vgpr0 = COPY %15(i32) + $vgpr1 = COPY %16(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -1017,233 +1361,276 @@ body: | ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) - %12:_(s32) = COPY $vgpr8 - %13:_(s32) = COPY $vgpr9 - %14:_(s32) = COPY $vgpr10 - %15:_(s32) = COPY $vgpr11 - %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) - %16:_(<4 x s32>) = G_FMUL %0, %1 - %17:_(<4 x s32>) = G_FADD %16, %2 - %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) - $vgpr0 = COPY %19(s32) - $vgpr1 = COPY %20(s32) - $vgpr2 = COPY %21(s32) - $vgpr3 = COPY %22(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(i32) = COPY $vgpr4 + %6:_(i32) = COPY $vgpr5 + %7:_(i32) = COPY $vgpr6 + %8:_(i32) = COPY $vgpr7 + %9:_(<4 x i32>) = G_BUILD_VECTOR %5(i32), %6(i32), %7(i32), %8(i32) + %10:_(i32) = COPY $vgpr8 + %11:_(i32) = COPY $vgpr9 + %12:_(i32) = COPY $vgpr10 + %13:_(i32) = COPY $vgpr11 + %14:_(<4 x i32>) = G_BUILD_VECTOR %10(i32), %11(i32), %12(i32), %13(i32) + %15:_(<4 x f32>) = G_BITCAST %4(<4 x i32>) + %16:_(<4 x f32>) = G_BITCAST %9(<4 x i32>) + %17:_(<4 x f32>) = G_FMUL %15, %16 + %18:_(<4 x f32>) = G_BITCAST %14(<4 x i32>) + %19:_(<4 x f32>) = G_FADD %17, %18 + %20:_(<4 x i32>) = G_BITCAST %19(<4 x f32>) + %21:_(i32), %22:_(i32), %23:_(i32), %24:_(i32) = G_UNMERGE_VALUES %20(<4 x i32>) + $vgpr0 = COPY %21(i32) + $vgpr1 = COPY %22(i32) + $vgpr2 = COPY %23(i32) + $vgpr3 = COPY %24(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -1256,197 +1643,240 @@ body: | ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %12:_(s32) = COPY $vgpr8 - %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) - %13:_(<3 x s32>) = G_FMUL %0, %1 - %14:_(<3 x s32>) = G_FADD %2, %13 - %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) - $vgpr0 = COPY %16(s32) - $vgpr1 = COPY %17(s32) - $vgpr2 = COPY %18(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + %4:_(i32) = COPY $vgpr3 + %5:_(i32) = COPY $vgpr4 + %6:_(i32) = COPY $vgpr5 + %7:_(<3 x i32>) = G_BUILD_VECTOR %4(i32), %5(i32), %6(i32) + %8:_(i32) = COPY $vgpr6 + %9:_(i32) = COPY $vgpr7 + %10:_(i32) = COPY $vgpr8 + %11:_(<3 x i32>) = G_BUILD_VECTOR %8(i32), %9(i32), %10(i32) + %12:_(<3 x f32>) = G_BITCAST %3(<3 x i32>) + %13:_(<3 x f32>) = G_BITCAST %7(<3 x i32>) + %14:_(<3 x f32>) = G_FMUL %12, %13 + %15:_(<3 x f32>) = G_BITCAST %11(<3 x i32>) + %16:_(<3 x f32>) = G_FADD %15, %14 + %17:_(<3 x i32>) = G_BITCAST %16(<3 x f32>) + %18:_(i32), %19:_(i32), %20:_(i32) = G_UNMERGE_VALUES %17(<3 x i32>) + $vgpr0 = COPY %18(i32) + $vgpr1 = COPY %19(i32) + $vgpr2 = COPY %20(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... @@ -1459,161 +1889,204 @@ body: | ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(<2 x s16>) = COPY $vgpr0 - %5:_(<2 x s16>) = COPY $vgpr1 - %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) - %6:_(<2 x s16>) = COPY $vgpr2 - %7:_(<2 x s16>) = COPY $vgpr3 - %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) - %8:_(<2 x s16>) = COPY $vgpr4 - %9:_(<2 x s16>) = COPY $vgpr5 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %10:_(<4 x s16>) = G_FMUL %0, %1 - %11:_(<4 x s16>) = G_FADD %10, %2 - %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) - $vgpr0 = COPY %13(<2 x s16>) - $vgpr1 = COPY %14(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(<2 x i16>) = COPY $vgpr2 + %4:_(<2 x i16>) = COPY $vgpr3 + %5:_(<4 x i16>) = G_CONCAT_VECTORS %3(<2 x i16>), %4(<2 x i16>) + %6:_(<2 x i16>) = COPY $vgpr4 + %7:_(<2 x i16>) = COPY $vgpr5 + %8:_(<4 x i16>) = G_CONCAT_VECTORS %6(<2 x i16>), %7(<2 x i16>) + %9:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %10:_(<4 x f16>) = G_BITCAST %5(<4 x i16>) + %11:_(<4 x f16>) = G_FMUL %9, %10 + %12:_(<4 x f16>) = G_BITCAST %8(<4 x i16>) + %13:_(<4 x f16>) = G_FADD %11, %12 + %14:_(<4 x i16>) = G_BITCAST %13(<4 x f16>) + %15:_(<2 x i16>), %16:_(<2 x i16>) = G_UNMERGE_VALUES %14(<4 x i16>) + $vgpr0 = COPY %15(<2 x i16>) + $vgpr1 = COPY %16(<2 x i16>) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -1626,222 +2099,258 @@ body: | ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(<2 x s16>) = COPY $vgpr0 - %5:_(<2 x s16>) = COPY $vgpr1 - %10:_(<2 x s16>) = G_IMPLICIT_DEF - %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) - %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) - %6:_(<2 x s16>) = COPY $vgpr2 - %7:_(<2 x s16>) = COPY $vgpr3 - %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) - %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) - %8:_(<2 x s16>) = COPY $vgpr4 - %9:_(<2 x s16>) = COPY $vgpr5 - %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) - %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) - %17:_(<3 x s16>) = G_FMUL %0, %1 - %18:_(<3 x s16>) = G_FADD %2, %17 - %22:_(<3 x s16>) = G_IMPLICIT_DEF - %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) - %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) - $vgpr0 = COPY %20(<2 x s16>) - $vgpr1 = COPY %21(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %3(<6 x i16>) + %6:_(<2 x i16>) = COPY $vgpr2 + %7:_(<2 x i16>) = COPY $vgpr3 + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<2 x i16>), %7(<2 x i16>), %2(<2 x i16>) + %9:_(<3 x i16>), %10:_(<3 x i16>) = G_UNMERGE_VALUES %8(<6 x i16>) + %11:_(<2 x i16>) = COPY $vgpr4 + %12:_(<2 x i16>) = COPY $vgpr5 + %13:_(<6 x i16>) = G_CONCAT_VECTORS %11(<2 x i16>), %12(<2 x i16>), %2(<2 x i16>) + %14:_(<3 x i16>), %15:_(<3 x i16>) = G_UNMERGE_VALUES %13(<6 x i16>) + %16:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %17:_(<3 x f16>) = G_BITCAST %9(<3 x i16>) + %18:_(<3 x f16>) = G_FMUL %16, %17 + %19:_(<3 x f16>) = G_BITCAST %14(<3 x i16>) + %20:_(<3 x f16>) = G_FADD %19, %18 + %21:_(<3 x i16>) = G_IMPLICIT_DEF + %22:_(<3 x i16>) = G_BITCAST %20(<3 x f16>) + %23:_(<6 x i16>) = G_CONCAT_VECTORS %22(<3 x i16>), %21(<3 x i16>) + %24:_(<2 x i16>), %25:_(<2 x i16>), %26:_(<2 x i16>) = G_UNMERGE_VALUES %23(<6 x i16>) + $vgpr0 = COPY %24(<2 x i16>) + $vgpr1 = COPY %25(<2 x i16>) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -1854,485 +2363,528 @@ body: | ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) - %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) - %12:_(s32) = COPY $vgpr8 - %13:_(s32) = COPY $vgpr9 - %14:_(s32) = COPY $vgpr10 - %15:_(s32) = COPY $vgpr11 - %16:_(s32) = COPY $vgpr12 - %17:_(s32) = COPY $vgpr13 - %18:_(s32) = COPY $vgpr14 - %19:_(s32) = COPY $vgpr15 - %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) - %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) - %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) - %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) - %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) - %20:_(s32) = COPY $vgpr16 - %21:_(s32) = COPY $vgpr17 - %22:_(s32) = COPY $vgpr18 - %23:_(s32) = COPY $vgpr19 - %24:_(s32) = COPY $vgpr20 - %25:_(s32) = COPY $vgpr21 - %26:_(s32) = COPY $vgpr22 - %27:_(s32) = COPY $vgpr23 - %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) - %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) - %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) - %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) - %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) - %40:_(<4 x s64>) = G_FMUL %0, %1 - %41:_(<4 x s64>) = G_FADD %40, %2 - %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) - $vgpr0 = COPY %43(s32) - $vgpr1 = COPY %44(s32) - $vgpr2 = COPY %45(s32) - $vgpr3 = COPY %46(s32) - $vgpr4 = COPY %47(s32) - $vgpr5 = COPY %48(s32) - $vgpr6 = COPY %49(s32) - $vgpr7 = COPY %50(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %9:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %10:_(i64) = G_MERGE_VALUES %4(i32), %5(i32) + %11:_(i64) = G_MERGE_VALUES %6(i32), %7(i32) + %12:_(<4 x i64>) = G_BUILD_VECTOR %8(i64), %9(i64), %10(i64), %11(i64) + %13:_(i32) = COPY $vgpr8 + %14:_(i32) = COPY $vgpr9 + %15:_(i32) = COPY $vgpr10 + %16:_(i32) = COPY $vgpr11 + %17:_(i32) = COPY $vgpr12 + %18:_(i32) = COPY $vgpr13 + %19:_(i32) = COPY $vgpr14 + %20:_(i32) = COPY $vgpr15 + %21:_(i64) = G_MERGE_VALUES %13(i32), %14(i32) + %22:_(i64) = G_MERGE_VALUES %15(i32), %16(i32) + %23:_(i64) = G_MERGE_VALUES %17(i32), %18(i32) + %24:_(i64) = G_MERGE_VALUES %19(i32), %20(i32) + %25:_(<4 x i64>) = G_BUILD_VECTOR %21(i64), %22(i64), %23(i64), %24(i64) + %26:_(i32) = COPY $vgpr16 + %27:_(i32) = COPY $vgpr17 + %28:_(i32) = COPY $vgpr18 + %29:_(i32) = COPY $vgpr19 + %30:_(i32) = COPY $vgpr20 + %31:_(i32) = COPY $vgpr21 + %32:_(i32) = COPY $vgpr22 + %33:_(i32) = COPY $vgpr23 + %34:_(i64) = G_MERGE_VALUES %26(i32), %27(i32) + %35:_(i64) = G_MERGE_VALUES %28(i32), %29(i32) + %36:_(i64) = G_MERGE_VALUES %30(i32), %31(i32) + %37:_(i64) = G_MERGE_VALUES %32(i32), %33(i32) + %38:_(<4 x i64>) = G_BUILD_VECTOR %34(i64), %35(i64), %36(i64), %37(i64) + %39:_(<4 x f64>) = G_BITCAST %12(<4 x i64>) + %40:_(<4 x f64>) = G_BITCAST %25(<4 x i64>) + %41:_(<4 x f64>) = G_FMUL %39, %40 + %42:_(<4 x f64>) = G_BITCAST %38(<4 x i64>) + %43:_(<4 x f64>) = G_FADD %41, %42 + %44:_(<4 x i64>) = G_BITCAST %43(<4 x f64>) + %45:_(i32), %46:_(i32), %47:_(i32), %48:_(i32), %49:_(i32), %50:_(i32), %51:_(i32), %52:_(i32) = G_UNMERGE_VALUES %44(<4 x i64>) + $vgpr0 = COPY %45(i32) + $vgpr1 = COPY %46(i32) + $vgpr2 = COPY %47(i32) + $vgpr3 = COPY %48(i32) + $vgpr4 = COPY %49(i32) + $vgpr5 = COPY %50(i32) + $vgpr6 = COPY %51(i32) + $vgpr7 = COPY %52(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... @@ -2345,385 +2897,428 @@ body: | ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %12:_(s32) = COPY $vgpr8 - %13:_(s32) = COPY $vgpr9 - %14:_(s32) = COPY $vgpr10 - %15:_(s32) = COPY $vgpr11 - %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) - %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) - %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) - %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) - %16:_(s32) = COPY $vgpr12 - %17:_(s32) = COPY $vgpr13 - %18:_(s32) = COPY $vgpr14 - %19:_(s32) = COPY $vgpr15 - %20:_(s32) = COPY $vgpr16 - %21:_(s32) = COPY $vgpr17 - %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) - %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) - %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) - %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) - %31:_(<3 x s64>) = G_FMUL %0, %1 - %32:_(<3 x s64>) = G_FADD %2, %31 - %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) - $vgpr0 = COPY %34(s32) - $vgpr1 = COPY %35(s32) - $vgpr2 = COPY %36(s32) - $vgpr3 = COPY %37(s32) - $vgpr4 = COPY %38(s32) - $vgpr5 = COPY %39(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %7:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %8:_(i64) = G_MERGE_VALUES %4(i32), %5(i32) + %9:_(<3 x i64>) = G_BUILD_VECTOR %6(i64), %7(i64), %8(i64) + %10:_(i32) = COPY $vgpr6 + %11:_(i32) = COPY $vgpr7 + %12:_(i32) = COPY $vgpr8 + %13:_(i32) = COPY $vgpr9 + %14:_(i32) = COPY $vgpr10 + %15:_(i32) = COPY $vgpr11 + %16:_(i64) = G_MERGE_VALUES %10(i32), %11(i32) + %17:_(i64) = G_MERGE_VALUES %12(i32), %13(i32) + %18:_(i64) = G_MERGE_VALUES %14(i32), %15(i32) + %19:_(<3 x i64>) = G_BUILD_VECTOR %16(i64), %17(i64), %18(i64) + %20:_(i32) = COPY $vgpr12 + %21:_(i32) = COPY $vgpr13 + %22:_(i32) = COPY $vgpr14 + %23:_(i32) = COPY $vgpr15 + %24:_(i32) = COPY $vgpr16 + %25:_(i32) = COPY $vgpr17 + %26:_(i64) = G_MERGE_VALUES %20(i32), %21(i32) + %27:_(i64) = G_MERGE_VALUES %22(i32), %23(i32) + %28:_(i64) = G_MERGE_VALUES %24(i32), %25(i32) + %29:_(<3 x i64>) = G_BUILD_VECTOR %26(i64), %27(i64), %28(i64) + %30:_(<3 x f64>) = G_BITCAST %9(<3 x i64>) + %31:_(<3 x f64>) = G_BITCAST %19(<3 x i64>) + %32:_(<3 x f64>) = G_FMUL %30, %31 + %33:_(<3 x f64>) = G_BITCAST %29(<3 x i64>) + %34:_(<3 x f64>) = G_FADD %33, %32 + %35:_(<3 x i64>) = G_BITCAST %34(<3 x f64>) + %36:_(i32), %37:_(i32), %38:_(i32), %39:_(i32), %40:_(i32), %41:_(i32) = G_UNMERGE_VALUES %35(<3 x i64>) + $vgpr0 = COPY %36(i32) + $vgpr1 = COPY %37(i32) + $vgpr2 = COPY %38(i32) + $vgpr3 = COPY %39(i32) + $vgpr4 = COPY %40(i32) + $vgpr5 = COPY %41(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir index 42e53bedb8d85..e91c17e8ce4ce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -17,92 +17,128 @@ body: | ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] - ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %4:_(s32) = reassoc G_FMUL %0, %1 - %5:_(s32) = reassoc G_FADD %4, %2 - $vgpr0 = COPY %5(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = reassoc G_FMUL %3, %4 + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = reassoc G_FADD %5, %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -115,92 +151,128 @@ body: | ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] - ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f32) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f32) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %4:_(s32) = reassoc G_FMUL %0, %1 - %5:_(s32) = reassoc G_FADD %2, %4 - $vgpr0 = COPY %5(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = reassoc G_FMUL %3, %4 + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = reassoc G_FADD %6, %5 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -213,128 +285,164 @@ body: | ; GFX9-LABEL: name: test_half_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] - ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-LABEL: name: test_half_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %4:_(s32) = COPY $vgpr0 - %0:_(s16) = G_TRUNC %4(s32) - %5:_(s32) = COPY $vgpr1 - %1:_(s16) = G_TRUNC %5(s32) - %6:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %6(s32) - %7:_(s16) = reassoc G_FMUL %0, %1 - %8:_(s16) = reassoc G_FADD %7, %2 - %10:_(s32) = G_ANYEXT %8(s16) - $vgpr0 = COPY %10(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(i32) = COPY $vgpr2 + %5:_(i16) = G_TRUNC %4(i32) + %6:_(f16) = G_BITCAST %1(i16) + %7:_(f16) = G_BITCAST %3(i16) + %8:_(f16) = reassoc G_FMUL %6, %7 + %9:_(f16) = G_BITCAST %5(i16) + %10:_(f16) = reassoc G_FADD %8, %9 + %11:_(i16) = G_BITCAST %10(f16) + %12:_(i32) = G_ANYEXT %11(i16) + $vgpr0 = COPY %12(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -347,128 +455,164 @@ body: | ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f16) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f16) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %4:_(s32) = COPY $vgpr0 - %0:_(s16) = G_TRUNC %4(s32) - %5:_(s32) = COPY $vgpr1 - %1:_(s16) = G_TRUNC %5(s32) - %6:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %6(s32) - %7:_(s16) = reassoc G_FMUL %0, %1 - %8:_(s16) = reassoc G_FADD %2, %7 - %10:_(s32) = G_ANYEXT %8(s16) - $vgpr0 = COPY %10(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(i32) = COPY $vgpr2 + %5:_(i16) = G_TRUNC %4(i32) + %6:_(f16) = G_BITCAST %1(i16) + %7:_(f16) = G_BITCAST %3(i16) + %8:_(f16) = reassoc G_FMUL %6, %7 + %9:_(f16) = G_BITCAST %5(i16) + %10:_(f16) = reassoc G_FADD %9, %8 + %11:_(i16) = G_BITCAST %10(f16) + %12:_(i32) = G_ANYEXT %11(i16) + $vgpr0 = COPY %12(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -481,164 +625,200 @@ body: | ; GFX9-LABEL: name: test_double_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: test_double_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %10:_(s64) = reassoc G_FMUL %0, %1 - %11:_(s64) = reassoc G_FADD %10, %2 - %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) - $vgpr0 = COPY %13(s32) - $vgpr1 = COPY %14(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(i64) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(f64) = G_BITCAST %2(i64) + %10:_(f64) = G_BITCAST %5(i64) + %11:_(f64) = reassoc G_FMUL %9, %10 + %12:_(f64) = G_BITCAST %8(i64) + %13:_(f64) = reassoc G_FADD %11, %12 + %14:_(i64) = G_BITCAST %13(f64) + %15:_(i32), %16:_(i32) = G_UNMERGE_VALUES %14(i64) + $vgpr0 = COPY %15(i32) + $vgpr1 = COPY %16(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -651,164 +831,200 @@ body: | ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(f64) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(f64) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %10:_(s64) = reassoc G_FMUL %0, %1 - %11:_(s64) = reassoc G_FADD %2, %10 - %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) - $vgpr0 = COPY %13(s32) - $vgpr1 = COPY %14(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(i64) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(f64) = G_BITCAST %2(i64) + %10:_(f64) = G_BITCAST %5(i64) + %11:_(f64) = reassoc G_FMUL %9, %10 + %12:_(f64) = G_BITCAST %8(i64) + %13:_(f64) = reassoc G_FADD %12, %11 + %14:_(i64) = G_BITCAST %13(f64) + %15:_(i32), %16:_(i32) = G_UNMERGE_VALUES %14(i64) + $vgpr0 = COPY %15(i32) + $vgpr1 = COPY %16(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -821,236 +1037,272 @@ body: | ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FMA]](<4 x f32>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FMA]](<4 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FMA]](<4 x f32>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f32>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FADD]](<4 x f32>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FMA]](<4 x f32>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i32>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) - %12:_(s32) = COPY $vgpr8 - %13:_(s32) = COPY $vgpr9 - %14:_(s32) = COPY $vgpr10 - %15:_(s32) = COPY $vgpr11 - %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) - %16:_(<4 x s32>) = reassoc G_FMUL %0, %1 - %17:_(<4 x s32>) = reassoc G_FADD %16, %2 - %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) - $vgpr0 = COPY %19(s32) - $vgpr1 = COPY %20(s32) - $vgpr2 = COPY %21(s32) - $vgpr3 = COPY %22(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(i32) = COPY $vgpr4 + %6:_(i32) = COPY $vgpr5 + %7:_(i32) = COPY $vgpr6 + %8:_(i32) = COPY $vgpr7 + %9:_(<4 x i32>) = G_BUILD_VECTOR %5(i32), %6(i32), %7(i32), %8(i32) + %10:_(i32) = COPY $vgpr8 + %11:_(i32) = COPY $vgpr9 + %12:_(i32) = COPY $vgpr10 + %13:_(i32) = COPY $vgpr11 + %14:_(<4 x i32>) = G_BUILD_VECTOR %10(i32), %11(i32), %12(i32), %13(i32) + %15:_(<4 x f32>) = G_BITCAST %4(<4 x i32>) + %16:_(<4 x f32>) = G_BITCAST %9(<4 x i32>) + %17:_(<4 x f32>) = reassoc G_FMUL %15, %16 + %18:_(<4 x f32>) = G_BITCAST %14(<4 x i32>) + %19:_(<4 x f32>) = reassoc G_FADD %17, %18 + %20:_(<4 x i32>) = G_BITCAST %19(<4 x f32>) + %21:_(i32), %22:_(i32), %23:_(i32), %24:_(i32) = G_UNMERGE_VALUES %20(<4 x i32>) + $vgpr0 = COPY %21(i32) + $vgpr1 = COPY %22(i32) + $vgpr2 = COPY %23(i32) + $vgpr3 = COPY %24(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -1063,200 +1315,236 @@ body: | ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FMA]](<3 x f32>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FMA]](<3 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FMA]](<3 x f32>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f32>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f32>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FADD]](<3 x f32>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x f32>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[FMA]](<3 x f32>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i32>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %12:_(s32) = COPY $vgpr8 - %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) - %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 - %14:_(<3 x s32>) = reassoc G_FADD %2, %13 - %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) - $vgpr0 = COPY %16(s32) - $vgpr1 = COPY %17(s32) - $vgpr2 = COPY %18(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + %4:_(i32) = COPY $vgpr3 + %5:_(i32) = COPY $vgpr4 + %6:_(i32) = COPY $vgpr5 + %7:_(<3 x i32>) = G_BUILD_VECTOR %4(i32), %5(i32), %6(i32) + %8:_(i32) = COPY $vgpr6 + %9:_(i32) = COPY $vgpr7 + %10:_(i32) = COPY $vgpr8 + %11:_(<3 x i32>) = G_BUILD_VECTOR %8(i32), %9(i32), %10(i32) + %12:_(<3 x f32>) = G_BITCAST %3(<3 x i32>) + %13:_(<3 x f32>) = G_BITCAST %7(<3 x i32>) + %14:_(<3 x f32>) = reassoc G_FMUL %12, %13 + %15:_(<3 x f32>) = G_BITCAST %11(<3 x i32>) + %16:_(<3 x f32>) = reassoc G_FADD %15, %14 + %17:_(<3 x i32>) = G_BITCAST %16(<3 x f32>) + %18:_(i32), %19:_(i32), %20:_(i32) = G_UNMERGE_VALUES %17(<3 x i32>) + $vgpr0 = COPY %18(i32) + $vgpr1 = COPY %19(i32) + $vgpr2 = COPY %20(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... @@ -1269,164 +1557,200 @@ body: | ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FMA]](<4 x f16>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FMA]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FMA]](<4 x f16>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f16>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FADD]](<4 x f16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FMA]](<4 x f16>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(<2 x s16>) = COPY $vgpr0 - %5:_(<2 x s16>) = COPY $vgpr1 - %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) - %6:_(<2 x s16>) = COPY $vgpr2 - %7:_(<2 x s16>) = COPY $vgpr3 - %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) - %8:_(<2 x s16>) = COPY $vgpr4 - %9:_(<2 x s16>) = COPY $vgpr5 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %10:_(<4 x s16>) = reassoc G_FMUL %0, %1 - %11:_(<4 x s16>) = reassoc G_FADD %10, %2 - %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) - $vgpr0 = COPY %13(<2 x s16>) - $vgpr1 = COPY %14(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(<2 x i16>) = COPY $vgpr2 + %4:_(<2 x i16>) = COPY $vgpr3 + %5:_(<4 x i16>) = G_CONCAT_VECTORS %3(<2 x i16>), %4(<2 x i16>) + %6:_(<2 x i16>) = COPY $vgpr4 + %7:_(<2 x i16>) = COPY $vgpr5 + %8:_(<4 x i16>) = G_CONCAT_VECTORS %6(<2 x i16>), %7(<2 x i16>) + %9:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %10:_(<4 x f16>) = G_BITCAST %5(<4 x i16>) + %11:_(<4 x f16>) = reassoc G_FMUL %9, %10 + %12:_(<4 x f16>) = G_BITCAST %8(<4 x i16>) + %13:_(<4 x f16>) = reassoc G_FADD %11, %12 + %14:_(<4 x i16>) = G_BITCAST %13(<4 x f16>) + %15:_(<2 x i16>), %16:_(<2 x i16>) = G_UNMERGE_VALUES %14(<4 x i16>) + $vgpr0 = COPY %15(<2 x i16>) + $vgpr1 = COPY %16(<2 x i16>) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -1439,218 +1763,254 @@ body: | ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FMA]](<3 x f16>) + ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FMA]](<3 x f16>) + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FMA]](<3 x f16>) + ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f16>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f16>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FADD]](<3 x f16>) + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x i16>), [[UV1:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x i16>), [[UV3:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[COPY4]](<2 x i16>), [[COPY5]](<2 x i16>), [[DEF]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x i16>), [[UV5:%[0-9]+]]:_(<3 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV2]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f16>) = G_BITCAST [[UV4]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i16>) = G_BITCAST [[FMA]](<3 x f16>) + ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<3 x i16>), [[DEF1]](<3 x i16>) + ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x i16>) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x i16>) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %4:_(<2 x s16>) = COPY $vgpr0 - %5:_(<2 x s16>) = COPY $vgpr1 - %10:_(<2 x s16>) = G_IMPLICIT_DEF - %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) - %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) - %6:_(<2 x s16>) = COPY $vgpr2 - %7:_(<2 x s16>) = COPY $vgpr3 - %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) - %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) - %8:_(<2 x s16>) = COPY $vgpr4 - %9:_(<2 x s16>) = COPY $vgpr5 - %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) - %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) - %17:_(<3 x s16>) = reassoc G_FMUL %0, %1 - %18:_(<3 x s16>) = reassoc G_FADD %2, %17 - %22:_(<3 x s16>) = G_IMPLICIT_DEF - %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) - %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) - $vgpr0 = COPY %20(<2 x s16>) - $vgpr1 = COPY %21(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %3(<6 x i16>) + %6:_(<2 x i16>) = COPY $vgpr2 + %7:_(<2 x i16>) = COPY $vgpr3 + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<2 x i16>), %7(<2 x i16>), %2(<2 x i16>) + %9:_(<3 x i16>), %10:_(<3 x i16>) = G_UNMERGE_VALUES %8(<6 x i16>) + %11:_(<2 x i16>) = COPY $vgpr4 + %12:_(<2 x i16>) = COPY $vgpr5 + %13:_(<6 x i16>) = G_CONCAT_VECTORS %11(<2 x i16>), %12(<2 x i16>), %2(<2 x i16>) + %14:_(<3 x i16>), %15:_(<3 x i16>) = G_UNMERGE_VALUES %13(<6 x i16>) + %16:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %17:_(<3 x f16>) = G_BITCAST %9(<3 x i16>) + %18:_(<3 x f16>) = reassoc G_FMUL %16, %17 + %19:_(<3 x f16>) = G_BITCAST %14(<3 x i16>) + %20:_(<3 x f16>) = reassoc G_FADD %19, %18 + %21:_(<3 x i16>) = G_IMPLICIT_DEF + %22:_(<3 x i16>) = G_BITCAST %20(<3 x f16>) + %23:_(<6 x i16>) = G_CONCAT_VECTORS %22(<3 x i16>), %21(<3 x i16>) + %24:_(<2 x i16>), %25:_(<2 x i16>), %26:_(<2 x i16>) = G_UNMERGE_VALUES %23(<6 x i16>) + $vgpr0 = COPY %24(<2 x i16>) + $vgpr1 = COPY %25(<2 x i16>) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -1663,488 +2023,524 @@ body: | ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FMA]](<4 x f64>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FMA]](<4 x f64>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FMA]](<4 x f64>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x f64>) = reassoc G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FADD]](<4 x f64>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV4]](i64), [[MV5]](i64), [[MV6]](i64), [[MV7]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY18]](i32), [[COPY19]](i32) + ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY20]](i32), [[COPY21]](i32) + ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY22]](i32), [[COPY23]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV8]](i64), [[MV9]](i64), [[MV10]](i64), [[MV11]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[FMA]](<4 x f64>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<4 x i64>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) - %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) - %12:_(s32) = COPY $vgpr8 - %13:_(s32) = COPY $vgpr9 - %14:_(s32) = COPY $vgpr10 - %15:_(s32) = COPY $vgpr11 - %16:_(s32) = COPY $vgpr12 - %17:_(s32) = COPY $vgpr13 - %18:_(s32) = COPY $vgpr14 - %19:_(s32) = COPY $vgpr15 - %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) - %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) - %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) - %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) - %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) - %20:_(s32) = COPY $vgpr16 - %21:_(s32) = COPY $vgpr17 - %22:_(s32) = COPY $vgpr18 - %23:_(s32) = COPY $vgpr19 - %24:_(s32) = COPY $vgpr20 - %25:_(s32) = COPY $vgpr21 - %26:_(s32) = COPY $vgpr22 - %27:_(s32) = COPY $vgpr23 - %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) - %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) - %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) - %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) - %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) - %40:_(<4 x s64>) = reassoc G_FMUL %0, %1 - %41:_(<4 x s64>) = reassoc G_FADD %40, %2 - %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) - $vgpr0 = COPY %43(s32) - $vgpr1 = COPY %44(s32) - $vgpr2 = COPY %45(s32) - $vgpr3 = COPY %46(s32) - $vgpr4 = COPY %47(s32) - $vgpr5 = COPY %48(s32) - $vgpr6 = COPY %49(s32) - $vgpr7 = COPY %50(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %9:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %10:_(i64) = G_MERGE_VALUES %4(i32), %5(i32) + %11:_(i64) = G_MERGE_VALUES %6(i32), %7(i32) + %12:_(<4 x i64>) = G_BUILD_VECTOR %8(i64), %9(i64), %10(i64), %11(i64) + %13:_(i32) = COPY $vgpr8 + %14:_(i32) = COPY $vgpr9 + %15:_(i32) = COPY $vgpr10 + %16:_(i32) = COPY $vgpr11 + %17:_(i32) = COPY $vgpr12 + %18:_(i32) = COPY $vgpr13 + %19:_(i32) = COPY $vgpr14 + %20:_(i32) = COPY $vgpr15 + %21:_(i64) = G_MERGE_VALUES %13(i32), %14(i32) + %22:_(i64) = G_MERGE_VALUES %15(i32), %16(i32) + %23:_(i64) = G_MERGE_VALUES %17(i32), %18(i32) + %24:_(i64) = G_MERGE_VALUES %19(i32), %20(i32) + %25:_(<4 x i64>) = G_BUILD_VECTOR %21(i64), %22(i64), %23(i64), %24(i64) + %26:_(i32) = COPY $vgpr16 + %27:_(i32) = COPY $vgpr17 + %28:_(i32) = COPY $vgpr18 + %29:_(i32) = COPY $vgpr19 + %30:_(i32) = COPY $vgpr20 + %31:_(i32) = COPY $vgpr21 + %32:_(i32) = COPY $vgpr22 + %33:_(i32) = COPY $vgpr23 + %34:_(i64) = G_MERGE_VALUES %26(i32), %27(i32) + %35:_(i64) = G_MERGE_VALUES %28(i32), %29(i32) + %36:_(i64) = G_MERGE_VALUES %30(i32), %31(i32) + %37:_(i64) = G_MERGE_VALUES %32(i32), %33(i32) + %38:_(<4 x i64>) = G_BUILD_VECTOR %34(i64), %35(i64), %36(i64), %37(i64) + %39:_(<4 x f64>) = G_BITCAST %12(<4 x i64>) + %40:_(<4 x f64>) = G_BITCAST %25(<4 x i64>) + %41:_(<4 x f64>) = reassoc G_FMUL %39, %40 + %42:_(<4 x f64>) = G_BITCAST %38(<4 x i64>) + %43:_(<4 x f64>) = reassoc G_FADD %41, %42 + %44:_(<4 x i64>) = G_BITCAST %43(<4 x f64>) + %45:_(i32), %46:_(i32), %47:_(i32), %48:_(i32), %49:_(i32), %50:_(i32), %51:_(i32), %52:_(i32) = G_UNMERGE_VALUES %44(<4 x i64>) + $vgpr0 = COPY %45(i32) + $vgpr1 = COPY %46(i32) + $vgpr2 = COPY %47(i32) + $vgpr3 = COPY %48(i32) + $vgpr4 = COPY %49(i32) + $vgpr5 = COPY %50(i32) + $vgpr6 = COPY %51(i32) + $vgpr7 = COPY %52(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... @@ -2157,388 +2553,424 @@ body: | ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FMA]](<3 x f64>) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-DENORM-NEXT: {{ $}} - ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FMA]](<3 x f64>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FMA]](<3 x f64>) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-DENORM-NEXT: {{ $}} - ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x f64>) = reassoc G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x f64>) = reassoc G_FADD [[BITCAST2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FADD]](<3 x f64>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY8]](i32), [[COPY9]](i32) + ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY10]](i32), [[COPY11]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV3]](i64), [[MV4]](i64), [[MV5]](i64) + ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY12]](i32), [[COPY13]](i32) + ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY14]](i32), [[COPY15]](i32) + ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY16]](i32), [[COPY17]](i32) + ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV6]](i64), [[MV7]](i64), [[MV8]](i64) + ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR1]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f64>) = G_BITCAST [[BUILD_VECTOR2]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x f64>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[FMA]](<3 x f64>) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<3 x i64>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](i32) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) - %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) - %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) - %10:_(s32) = COPY $vgpr6 - %11:_(s32) = COPY $vgpr7 - %12:_(s32) = COPY $vgpr8 - %13:_(s32) = COPY $vgpr9 - %14:_(s32) = COPY $vgpr10 - %15:_(s32) = COPY $vgpr11 - %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) - %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) - %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) - %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) - %16:_(s32) = COPY $vgpr12 - %17:_(s32) = COPY $vgpr13 - %18:_(s32) = COPY $vgpr14 - %19:_(s32) = COPY $vgpr15 - %20:_(s32) = COPY $vgpr16 - %21:_(s32) = COPY $vgpr17 - %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) - %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) - %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) - %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) - %31:_(<3 x s64>) = reassoc G_FMUL %0, %1 - %32:_(<3 x s64>) = reassoc G_FADD %2, %31 - %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) - $vgpr0 = COPY %34(s32) - $vgpr1 = COPY %35(s32) - $vgpr2 = COPY %36(s32) - $vgpr3 = COPY %37(s32) - $vgpr4 = COPY %38(s32) - $vgpr5 = COPY %39(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %7:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %8:_(i64) = G_MERGE_VALUES %4(i32), %5(i32) + %9:_(<3 x i64>) = G_BUILD_VECTOR %6(i64), %7(i64), %8(i64) + %10:_(i32) = COPY $vgpr6 + %11:_(i32) = COPY $vgpr7 + %12:_(i32) = COPY $vgpr8 + %13:_(i32) = COPY $vgpr9 + %14:_(i32) = COPY $vgpr10 + %15:_(i32) = COPY $vgpr11 + %16:_(i64) = G_MERGE_VALUES %10(i32), %11(i32) + %17:_(i64) = G_MERGE_VALUES %12(i32), %13(i32) + %18:_(i64) = G_MERGE_VALUES %14(i32), %15(i32) + %19:_(<3 x i64>) = G_BUILD_VECTOR %16(i64), %17(i64), %18(i64) + %20:_(i32) = COPY $vgpr12 + %21:_(i32) = COPY $vgpr13 + %22:_(i32) = COPY $vgpr14 + %23:_(i32) = COPY $vgpr15 + %24:_(i32) = COPY $vgpr16 + %25:_(i32) = COPY $vgpr17 + %26:_(i64) = G_MERGE_VALUES %20(i32), %21(i32) + %27:_(i64) = G_MERGE_VALUES %22(i32), %23(i32) + %28:_(i64) = G_MERGE_VALUES %24(i32), %25(i32) + %29:_(<3 x i64>) = G_BUILD_VECTOR %26(i64), %27(i64), %28(i64) + %30:_(<3 x f64>) = G_BITCAST %9(<3 x i64>) + %31:_(<3 x f64>) = G_BITCAST %19(<3 x i64>) + %32:_(<3 x f64>) = reassoc G_FMUL %30, %31 + %33:_(<3 x f64>) = G_BITCAST %29(<3 x i64>) + %34:_(<3 x f64>) = reassoc G_FADD %33, %32 + %35:_(<3 x i64>) = G_BITCAST %34(<3 x f64>) + %36:_(i32), %37:_(i32), %38:_(i32), %39:_(i32), %40:_(i32), %41:_(i32) = G_UNMERGE_VALUES %35(<3 x i64>) + $vgpr0 = COPY %36(i32) + $vgpr1 = COPY %37(i32) + $vgpr2 = COPY %38(i32) + $vgpr3 = COPY %39(i32) + $vgpr4 = COPY %40(i32) + $vgpr5 = COPY %41(i32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir index 2845a632a84b3..2dd12ad7feb54 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir @@ -12,21 +12,29 @@ body: | ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 - ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FADD %6, %el1 - $vgpr0 = COPY %7(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FMUL %6, %7 + %9:_(f32) = G_BITCAST %el1(i32) + %10:_(f32) = G_FADD %8, %9 + %11:_(i32) = G_BITCAST %10(f32) + $vgpr0 = COPY %11(i32) ... --- @@ -42,21 +50,29 @@ body: | ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 - ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FADD %el1, %6 - $vgpr0 = COPY %7(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FMUL %6, %7 + %9:_(f32) = G_BITCAST %el1(i32) + %10:_(f32) = G_FADD %9, %8 + %11:_(i32) = G_BITCAST %10(f32) + $vgpr0 = COPY %11(i32) ... --- @@ -72,28 +88,36 @@ body: | ; GFX10-LABEL: name: test_f16_f32_add_ext_mul ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 - ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = COPY $sgpr1 - %3:_(s16) = G_TRUNC %2(s32) + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $sgpr1 + %3:_(i16) = G_TRUNC %2(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 - %9:_(s32) = G_FPEXT %8(s16) - %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 - $vgpr0 = COPY %10(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %8:_(f16) = G_BITCAST %1(i16) + %9:_(f16) = G_BITCAST %3(i16) + %10:_(f16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %8, %9 + %11:_(f32) = G_FPEXT %10(f16) + %12:_(f32) = G_BITCAST %el1(i32) + %13:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FADD %11, %12 + %14:_(i32) = G_BITCAST %13(f32) + $vgpr0 = COPY %14(i32) ... --- @@ -109,28 +133,36 @@ body: | ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 - ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = COPY $sgpr1 - %3:_(s16) = G_TRUNC %2(s32) + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $sgpr1 + %3:_(i16) = G_TRUNC %2(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 - %9:_(s32) = G_FPEXT %8(s16) - %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 - $vgpr0 = COPY %10(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %8:_(f16) = G_BITCAST %1(i16) + %9:_(f16) = G_BITCAST %3(i16) + %10:_(f16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %8, %9 + %11:_(f32) = G_FPEXT %10(f16) + %12:_(f32) = G_BITCAST %el1(i32) + %13:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FADD %12, %11 + %14:_(i32) = G_BITCAST %13(f32) + $vgpr0 = COPY %14(i32) ... --- @@ -142,27 +174,39 @@ body: | ; GFX10-LABEL: name: test_f32_add_fma_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST4]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[BITCAST2]], [[BITCAST3]], [[FMA]] + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMA1]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 %ptr:_(p1) = COPY $vgpr4_vgpr5 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 - %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 - %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 - $vgpr0 = COPY %10(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_BITCAST %3(i32) + %10:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %8, %9 + %11:_(f32) = G_BITCAST %0(i32) + %12:_(f32) = G_BITCAST %1(i32) + %13:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FMA %11, %12, %10 + %14:_(f32) = G_BITCAST %el1(i32) + %15:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FADD %13, %14 + %16:_(i32) = G_BITCAST %15(f32) + $vgpr0 = COPY %16(i32) ... --- @@ -174,27 +218,39 @@ body: | ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST4]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[BITCAST2]], [[BITCAST3]], [[FMA]] + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMA1]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 %ptr:_(p1) = COPY $vgpr4_vgpr5 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 - %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 - %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 - $vgpr0 = COPY %10(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_BITCAST %3(i32) + %10:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %8, %9 + %11:_(f32) = G_BITCAST %0(i32) + %12:_(f32) = G_BITCAST %1(i32) + %13:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FMA %11, %12, %10 + %14:_(f32) = G_BITCAST %el1(i32) + %15:_(f32) = nnan ninf nsz arcp contract afn reassoc G_FADD %14, %13 + %16:_(i32) = G_BITCAST %15(f32) + $vgpr0 = COPY %16(i32) ... --- @@ -210,34 +266,46 @@ body: | ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[BITCAST4]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[BITCAST2]], [[BITCAST3]], [[FMA]] + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMA1]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = COPY $vgpr4 - %7:_(s16) = G_TRUNC %6(s32) - %8:_(s32) = COPY $vgpr5 - %9:_(s16) = G_TRUNC %8(s32) - %10:_(s16) = G_FMUL %7, %9 - %11:_(s32) = G_FPEXT %10(s16) - %12:_(s32) = G_FMA %0, %1, %11 - %13:_(s32) = G_FADD %12, %el1 - $vgpr0 = COPY %13(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %6:_(i32) = COPY $vgpr4 + %7:_(i16) = G_TRUNC %6(i32) + %8:_(i32) = COPY $vgpr5 + %9:_(i16) = G_TRUNC %8(i32) + %10:_(f16) = G_BITCAST %7(i16) + %11:_(f16) = G_BITCAST %9(i16) + %12:_(f16) = G_FMUL %10, %11 + %13:_(f32) = G_FPEXT %12(f16) + %14:_(f32) = G_BITCAST %0(i32) + %15:_(f32) = G_BITCAST %1(i32) + %16:_(f32) = G_FMA %14, %15, %13 + %17:_(f32) = G_BITCAST %el1(i32) + %18:_(f32) = G_FADD %16, %17 + %19:_(i32) = G_BITCAST %18(f32) + $vgpr0 = COPY %19(i32) ... --- @@ -253,41 +321,53 @@ body: | ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = COPY $vgpr1 - %3:_(s16) = G_TRUNC %2(s32) + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT2]], [[FPEXT3]], [[BITCAST4]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMA1]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %2(i32) %ptr:_(p1) = COPY $vgpr2_vgpr3 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %8:_(s32) = COPY $vgpr4 - %9:_(s16) = G_TRUNC %8(s32) - %10:_(s32) = COPY $vgpr5 - %11:_(s16) = G_TRUNC %10(s32) - %12:_(s16) = G_FMUL %9, %11 - %13:_(s16) = G_FMUL %1, %3 - %14:_(s16) = G_FADD %13, %12 - %15:_(s32) = G_FPEXT %14(s16) - %16:_(s32) = G_FADD %15, %el1 - $vgpr0 = COPY %16(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %8:_(i32) = COPY $vgpr4 + %9:_(i16) = G_TRUNC %8(i32) + %10:_(i32) = COPY $vgpr5 + %11:_(i16) = G_TRUNC %10(i32) + %12:_(f16) = G_BITCAST %9(i16) + %13:_(f16) = G_BITCAST %11(i16) + %14:_(f16) = G_FMUL %12, %13 + %15:_(f16) = G_BITCAST %1(i16) + %16:_(f16) = G_BITCAST %3(i16) + %17:_(f16) = G_FMUL %15, %16 + %18:_(f16) = G_FADD %17, %14 + %19:_(f32) = G_FPEXT %18(f16) + %20:_(f32) = G_BITCAST %el1(i32) + %21:_(f32) = G_FADD %19, %20 + %22:_(i32) = G_BITCAST %21(f32) + $vgpr0 = COPY %22(i32) ... --- @@ -304,33 +384,45 @@ body: | ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[BITCAST4]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[BITCAST2]], [[BITCAST3]], [[FMA]] + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMA1]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %4:_(s32) = COPY $vgpr2 - %5:_(s32) = COPY $vgpr3 - %6:_(s32) = COPY $vgpr4 - %7:_(s16) = G_TRUNC %6(s32) - %8:_(s32) = COPY $vgpr5 - %9:_(s16) = G_TRUNC %8(s32) - %10:_(s16) = G_FMUL %7, %9 - %11:_(s32) = G_FPEXT %10(s16) - %12:_(s32) = G_FMA %4, %5, %11 - %13:_(s32) = G_FADD %el1, %12 - $vgpr0 = COPY %13(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %4:_(i32) = COPY $vgpr2 + %5:_(i32) = COPY $vgpr3 + %6:_(i32) = COPY $vgpr4 + %7:_(i16) = G_TRUNC %6(i32) + %8:_(i32) = COPY $vgpr5 + %9:_(i16) = G_TRUNC %8(i32) + %10:_(f16) = G_BITCAST %7(i16) + %11:_(f16) = G_BITCAST %9(i16) + %12:_(f16) = G_FMUL %10, %11 + %13:_(f32) = G_FPEXT %12(f16) + %14:_(f32) = G_BITCAST %4(i32) + %15:_(f32) = G_BITCAST %5(i32) + %16:_(f32) = G_FMA %14, %15, %13 + %17:_(f32) = G_BITCAST %el1(i32) + %18:_(f32) = G_FADD %17, %16 + %19:_(i32) = G_BITCAST %18(f32) + $vgpr0 = COPY %19(i32) ... --- @@ -347,40 +439,52 @@ body: | ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT2]], [[FPEXT3]], [[BITCAST4]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMA1]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %4:_(s32) = COPY $vgpr2 - %5:_(s16) = G_TRUNC %4(s32) - %6:_(s32) = COPY $vgpr3 - %7:_(s16) = G_TRUNC %6(s32) - %8:_(s32) = COPY $vgpr4 - %9:_(s16) = G_TRUNC %8(s32) - %10:_(s32) = COPY $vgpr5 - %11:_(s16) = G_TRUNC %10(s32) - %12:_(s16) = G_FMUL %9, %11 - %13:_(s16) = G_FMUL %5, %7 - %14:_(s16) = G_FADD %13, %12 - %15:_(s32) = G_FPEXT %14(s16) - %16:_(s32) = G_FADD %el1, %15 - $vgpr0 = COPY %16(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %4:_(i32) = COPY $vgpr2 + %5:_(i16) = G_TRUNC %4(i32) + %6:_(i32) = COPY $vgpr3 + %7:_(i16) = G_TRUNC %6(i32) + %8:_(i32) = COPY $vgpr4 + %9:_(i16) = G_TRUNC %8(i32) + %10:_(i32) = COPY $vgpr5 + %11:_(i16) = G_TRUNC %10(i32) + %12:_(f16) = G_BITCAST %9(i16) + %13:_(f16) = G_BITCAST %11(i16) + %14:_(f16) = G_FMUL %12, %13 + %15:_(f16) = G_BITCAST %5(i16) + %16:_(f16) = G_BITCAST %7(i16) + %17:_(f16) = G_FMUL %15, %16 + %18:_(f16) = G_FADD %17, %14 + %19:_(f32) = G_FPEXT %18(f16) + %20:_(f32) = G_BITCAST %el1(i32) + %21:_(f32) = G_FADD %20, %19 + %22:_(i32) = G_BITCAST %21(f32) + $vgpr0 = COPY %22(i32) ... --- @@ -396,22 +500,30 @@ body: | ; GFX10-LABEL: name: test_f32_sub_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1 - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]] - ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[FNEG]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr0_vgpr1 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FSUB %6, %el1 - $vgpr0 = COPY %7(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FMUL %6, %7 + %9:_(f32) = G_BITCAST %el1(i32) + %10:_(f32) = G_FSUB %8, %9 + %11:_(i32) = G_BITCAST %10(f32) + $vgpr0 = COPY %11(i32) ... --- @@ -427,20 +539,28 @@ body: | ; GFX10-LABEL: name: test_f32_sub_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1 - ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + ; GFX10-NEXT: %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %el1(i32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[BITCAST1]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 - %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) - %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FSUB %el1, %6 - $vgpr0 = COPY %7(s32) + %vec:_(<2 x i32>) = G_LOAD %ptr(p1) :: (load (<2 x i32>), addrspace 1) + %el0:_(i32), %el1:_(i32) = G_UNMERGE_VALUES %vec(<2 x i32>) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FMUL %6, %7 + %9:_(f32) = G_BITCAST %el1(i32) + %10:_(f32) = G_FSUB %9, %8 + %11:_(i32) = G_BITCAST %10(f32) + $vgpr0 = COPY %11(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir index 96a776f6fbb69..716085082b7e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: no_fold_add_into_select_s32_0_multi_use ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %ten, %twenty - ; CHECK-NEXT: %thirty:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: %add:_(s32) = G_ADD %select, %thirty - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(s32), implicit %select(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %add:_(s32) = G_ADD %select, %thirty - S_ENDPGM 0, implicit %add, implicit %select + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + ; CHECK-NEXT: %thirty:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: %add:_(i32) = G_ADD %select, %thirty + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(i32), implicit %select(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %add:_(i32) = G_ADD %select, %thirty + S_ENDPGM 0, implicit %add(i32), implicit %select(i32) ... --- @@ -39,24 +39,24 @@ body: | ; CHECK-LABEL: name: no_fold_add_into_select_s32_1_multi_use ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %ten, %twenty - ; CHECK-NEXT: %thirty:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: %add:_(s32) = G_ADD %select, %thirty - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(s32), implicit %select(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %add:_(s32) = G_ADD %thirty, %select - S_ENDPGM 0, implicit %add, implicit %select + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + ; CHECK-NEXT: %thirty:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: %add:_(i32) = G_ADD %select, %thirty + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(i32), implicit %select(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %add:_(i32) = G_ADD %thirty, %select + S_ENDPGM 0, implicit %add(i32), implicit %select(i32) ... --- @@ -68,25 +68,25 @@ body: | ; CHECK-LABEL: name: no_fold_sub_into_select_s32_nonconst_rhs ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %ten, %twenty - ; CHECK-NEXT: %sub:_(s32) = G_SUB %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %sub:_(s32) = G_SUB %select, %variable - S_ENDPGM 0, implicit %sub + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + ; CHECK-NEXT: %sub:_(i32) = G_SUB %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %sub:_(i32) = G_SUB %select, %variable + S_ENDPGM 0, implicit %sub(i32) ... --- @@ -98,25 +98,25 @@ body: | ; CHECK-LABEL: name: no_fold_sub_into_select_s32_nonconst_lhs ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %ten, %twenty - ; CHECK-NEXT: %sub:_(s32) = G_SUB %variable, %select - ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %sub:_(s32) = G_SUB %variable, %select - S_ENDPGM 0, implicit %sub + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + ; CHECK-NEXT: %sub:_(i32) = G_SUB %variable, %select + ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %sub:_(i32) = G_SUB %variable, %select + S_ENDPGM 0, implicit %sub(i32) ... --- @@ -128,22 +128,22 @@ body: | ; CHECK-LABEL: name: fold_add_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: %add:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %add:_(s32) = G_ADD %select, %thirty - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: %add:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %add:_(i32) = G_ADD %select, %thirty + S_ENDPGM 0, implicit %add(i32) ... --- @@ -155,22 +155,22 @@ body: | ; CHECK-LABEL: name: fold_add_into_select_s32_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: %add:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %add:_(s32) = G_ADD %thirty, %select - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: %add:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %add:_(i32) = G_ADD %thirty, %select + S_ENDPGM 0, implicit %add(i32) ... --- @@ -182,27 +182,27 @@ body: | ; CHECK-LABEL: name: fold_add_into_select_v2s32_splat ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %reg1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %cond:_(<2 x s1>) = G_ICMP intpred(eq), %reg0(<2 x s32>), %reg1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: %add:_(<2 x s32>) = G_SELECT %cond(<2 x s1>), [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(<2 x s32>) - %reg0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %reg1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %cond:_(<2 x s1>) = G_ICMP intpred(eq), %reg0, %reg1 - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %ten_vec:_(<2 x s32>) = G_BUILD_VECTOR %ten, %ten - %twenty_vec:_(<2 x s32>) = G_BUILD_VECTOR %twenty, %twenty - %select:_(<2 x s32>) = G_SELECT %cond, %ten_vec, %twenty_vec - %thirty:_(s32) = G_CONSTANT i32 30 - %thirty_vec:_(<2 x s32>) = G_BUILD_VECTOR %thirty, %thirty - %add:_(<2 x s32>) = G_ADD %select, %thirty_vec - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %reg1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %cond:_(<2 x i1>) = G_ICMP intpred(eq), %reg0(<2 x i32>), %reg1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: %add:_(<2 x i32>) = G_SELECT %cond(<2 x i1>), [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(<2 x i32>) + %reg0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %reg1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %cond:_(<2 x i1>) = G_ICMP intpred(eq), %reg0(<2 x i32>), %reg1 + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %ten_vec:_(<2 x i32>) = G_BUILD_VECTOR %ten(i32), %ten(i32) + %twenty_vec:_(<2 x i32>) = G_BUILD_VECTOR %twenty(i32), %twenty(i32) + %select:_(<2 x i32>) = G_SELECT %cond(<2 x i1>), %ten_vec, %twenty_vec + %thirty:_(i32) = G_CONSTANT i32 30 + %thirty_vec:_(<2 x i32>) = G_BUILD_VECTOR %thirty(i32), %thirty(i32) + %add:_(<2 x i32>) = G_ADD %select, %thirty_vec + S_ENDPGM 0, implicit %add(<2 x i32>) ... --- @@ -214,29 +214,29 @@ body: | ; CHECK-LABEL: name: fold_add_into_select_v2s32_nonsplat ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %reg1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %cond:_(<2 x s1>) = G_ICMP intpred(eq), %reg0(<2 x s32>), %reg1 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %thirty:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), %thirty(s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), %twenty(s32) - ; CHECK-NEXT: %add:_(<2 x s32>) = G_SELECT %cond(<2 x s1>), [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(<2 x s32>) - %reg0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %reg1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %cond:_(<2 x s1>) = G_ICMP intpred(eq), %reg0, %reg1 - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %const_vec0:_(<2 x s32>) = G_BUILD_VECTOR %ten, %twenty - %const_vec1:_(<2 x s32>) = G_BUILD_VECTOR %twenty, %ten - %select:_(<2 x s32>) = G_SELECT %cond, %const_vec0, %const_vec1 - %thirty:_(s32) = G_CONSTANT i32 30 - %const_vec3:_(<2 x s32>) = G_BUILD_VECTOR %thirty, %ten - %add:_(<2 x s32>) = G_ADD %select, %const_vec3 - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %reg1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %cond:_(<2 x i1>) = G_ICMP intpred(eq), %reg0(<2 x i32>), %reg1 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %thirty:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), %thirty(i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), %twenty(i32) + ; CHECK-NEXT: %add:_(<2 x i32>) = G_SELECT %cond(<2 x i1>), [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(<2 x i32>) + %reg0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %reg1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %cond:_(<2 x i1>) = G_ICMP intpred(eq), %reg0(<2 x i32>), %reg1 + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %const_vec0:_(<2 x i32>) = G_BUILD_VECTOR %ten(i32), %twenty(i32) + %const_vec1:_(<2 x i32>) = G_BUILD_VECTOR %twenty(i32), %ten(i32) + %select:_(<2 x i32>) = G_SELECT %cond(<2 x i1>), %const_vec0, %const_vec1 + %thirty:_(i32) = G_CONSTANT i32 30 + %const_vec3:_(<2 x i32>) = G_BUILD_VECTOR %thirty(i32), %ten(i32) + %add:_(<2 x i32>) = G_ADD %select, %const_vec3 + S_ENDPGM 0, implicit %add(<2 x i32>) ... --- @@ -248,32 +248,32 @@ body: | ; CHECK-LABEL: name: fold_add_into_select_v2s32_nonsplat_undef ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %reg1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %cond:_(<2 x s1>) = G_ICMP intpred(eq), %reg0(<2 x s32>), %reg1 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: %const_vec0:_(<2 x s32>) = G_BUILD_VECTOR %undef(s32), %twenty(s32) - ; CHECK-NEXT: %const_vec1:_(<2 x s32>) = G_BUILD_VECTOR %twenty(s32), %undef(s32) - ; CHECK-NEXT: %thirty:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: %const_vec3:_(<2 x s32>) = G_BUILD_VECTOR %thirty(s32), %undef(s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD %const_vec0, %const_vec3 - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s32>) = G_ADD %const_vec1, %const_vec3 - ; CHECK-NEXT: %add:_(<2 x s32>) = G_SELECT %cond(<2 x s1>), [[ADD]], [[ADD1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(<2 x s32>) - %reg0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %reg1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %cond:_(<2 x s1>) = G_ICMP intpred(eq), %reg0, %reg1 - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %undef:_(s32) = G_IMPLICIT_DEF - %const_vec0:_(<2 x s32>) = G_BUILD_VECTOR %undef, %twenty - %const_vec1:_(<2 x s32>) = G_BUILD_VECTOR %twenty, %undef - %select:_(<2 x s32>) = G_SELECT %cond, %const_vec0, %const_vec1 - %thirty:_(s32) = G_CONSTANT i32 30 - %const_vec3:_(<2 x s32>) = G_BUILD_VECTOR %thirty, %undef - %add:_(<2 x s32>) = G_ADD %select, %const_vec3 - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %reg1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %cond:_(<2 x i1>) = G_ICMP intpred(eq), %reg0(<2 x i32>), %reg1 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: %const_vec0:_(<2 x i32>) = G_BUILD_VECTOR %undef(i32), %twenty(i32) + ; CHECK-NEXT: %const_vec1:_(<2 x i32>) = G_BUILD_VECTOR %twenty(i32), %undef(i32) + ; CHECK-NEXT: %thirty:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: %const_vec3:_(<2 x i32>) = G_BUILD_VECTOR %thirty(i32), %undef(i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x i32>) = G_ADD %const_vec0, %const_vec3 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(<2 x i32>) = G_ADD %const_vec1, %const_vec3 + ; CHECK-NEXT: %add:_(<2 x i32>) = G_SELECT %cond(<2 x i1>), [[ADD]], [[ADD1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(<2 x i32>) + %reg0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %reg1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %cond:_(<2 x i1>) = G_ICMP intpred(eq), %reg0(<2 x i32>), %reg1 + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %undef:_(i32) = G_IMPLICIT_DEF + %const_vec0:_(<2 x i32>) = G_BUILD_VECTOR %undef(i32), %twenty(i32) + %const_vec1:_(<2 x i32>) = G_BUILD_VECTOR %twenty(i32), %undef(i32) + %select:_(<2 x i32>) = G_SELECT %cond(<2 x i1>), %const_vec0, %const_vec1 + %thirty:_(i32) = G_CONSTANT i32 30 + %const_vec3:_(<2 x i32>) = G_BUILD_VECTOR %thirty(i32), %undef(i32) + %add:_(<2 x i32>) = G_ADD %select, %const_vec3 + S_ENDPGM 0, implicit %add(<2 x i32>) ... --- @@ -285,22 +285,22 @@ body: | ; CHECK-LABEL: name: fold_sub_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -20 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -10 - ; CHECK-NEXT: %sub:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %sub:_(s32) = G_SUB %select, %thirty - S_ENDPGM 0, implicit %sub + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -20 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -10 + ; CHECK-NEXT: %sub:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %sub:_(i32) = G_SUB %select, %thirty + S_ENDPGM 0, implicit %sub(i32) ... --- @@ -312,22 +312,22 @@ body: | ; CHECK-LABEL: name: fold_sub_into_select_s32_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %sub:_(s32) = G_SELECT %cond(s1), %twenty, %ten - ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %sub:_(s32) = G_SUB %thirty, %select - S_ENDPGM 0, implicit %sub + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %sub:_(i32) = G_SELECT %cond(i1), %twenty, %ten + ; CHECK-NEXT: S_ENDPGM 0, implicit %sub(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %sub:_(i32) = G_SUB %thirty, %select + S_ENDPGM 0, implicit %sub(i32) ... --- @@ -339,22 +339,22 @@ body: | ; CHECK-LABEL: name: fold_ptr_add_into_select_p3_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero ; CHECK-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 40 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 50 - ; CHECK-NEXT: %ptr_add:_(p3) = G_SELECT %cond(s1), [[C]], [[C1]] + ; CHECK-NEXT: %ptr_add:_(p3) = G_SELECT %cond(i1), [[C]], [[C1]] ; CHECK-NEXT: S_ENDPGM 0, implicit %ptr_add(p3) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero %ten:_(p3) = G_CONSTANT i32 10 %twenty:_(p3) = G_CONSTANT i32 20 - %select:_(p3) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %ptr_add:_(p3) = G_PTR_ADD %select, %thirty - S_ENDPGM 0, implicit %ptr_add + %select:_(p3) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %ptr_add:_(p3) = G_PTR_ADD %select, %thirty(i32) + S_ENDPGM 0, implicit %ptr_add(p3) ... --- @@ -366,22 +366,22 @@ body: | ; CHECK-LABEL: name: fold_ptr_add_into_select_p3_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero ; CHECK-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 40 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 50 - ; CHECK-NEXT: %ptr_add:_(p3) = G_SELECT %cond(s1), [[C]], [[C1]] + ; CHECK-NEXT: %ptr_add:_(p3) = G_SELECT %cond(i1), [[C]], [[C1]] ; CHECK-NEXT: S_ENDPGM 0, implicit %ptr_add(p3) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty %thirty:_(p3) = G_CONSTANT i32 30 - %ptr_add:_(p3) = G_PTR_ADD %thirty, %select - S_ENDPGM 0, implicit %ptr_add + %ptr_add:_(p3) = G_PTR_ADD %thirty, %select(i32) + S_ENDPGM 0, implicit %ptr_add(p3) ... --- @@ -393,22 +393,22 @@ body: | ; CHECK-LABEL: name: fold_shl_into_select_s64_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1280 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2560 - ; CHECK-NEXT: %shl:_(s64) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %shl(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s64) = G_CONSTANT i64 10 - %twenty:_(s64) = G_CONSTANT i64 20 - %select:_(s64) = G_SELECT %cond, %ten, %twenty - %seven:_(s32) = G_CONSTANT i32 7 - %shl:_(s64) = G_SHL %select, %seven - S_ENDPGM 0, implicit %shl + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1280 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2560 + ; CHECK-NEXT: %shl:_(i64) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %shl(i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i64) = G_CONSTANT i64 10 + %twenty:_(i64) = G_CONSTANT i64 20 + %select:_(i64) = G_SELECT %cond(i1), %ten, %twenty + %seven:_(i32) = G_CONSTANT i32 7 + %shl:_(i64) = G_SHL %select, %seven(i32) + S_ENDPGM 0, implicit %shl(i64) ... --- @@ -420,22 +420,22 @@ body: | ; CHECK-LABEL: name: fold_shl_into_select_s64_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8192 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 524288 - ; CHECK-NEXT: %shl:_(s64) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %shl(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %sixteen:_(s32) = G_CONSTANT i32 16 - %select:_(s32) = G_SELECT %cond, %ten, %sixteen - %eight:_(s64) = G_CONSTANT i64 8 - %shl:_(s64) = G_SHL %eight, %select - S_ENDPGM 0, implicit %shl + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8192 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 524288 + ; CHECK-NEXT: %shl:_(i64) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %shl(i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %sixteen:_(i32) = G_CONSTANT i32 16 + %select:_(i32) = G_SELECT %cond(i1), %ten, %sixteen + %eight:_(i64) = G_CONSTANT i64 8 + %shl:_(i64) = G_SHL %eight, %select(i32) + S_ENDPGM 0, implicit %shl(i64) ... --- @@ -447,21 +447,21 @@ body: | ; CHECK-LABEL: name: fold_and_variable_into_select_zero_neg1_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(ne), %reg(s32), %zero - ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) - ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %select:_(s32) = G_SELECT %cond, %zero, %neg1 - %and:_(s32) = G_AND %select, %variable - S_ENDPGM 0, implicit %and + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(ne), %reg(i32), %zero + ; CHECK-NEXT: %select:_(i32) = G_SEXT %cond(i1) + ; CHECK-NEXT: %and:_(i32) = G_AND %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %and(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %select:_(i32) = G_SELECT %cond(i1), %zero, %neg1 + %and:_(i32) = G_AND %select, %variable + S_ENDPGM 0, implicit %and(i32) ... --- @@ -473,21 +473,21 @@ body: | ; CHECK-LABEL: name: fold_and_variable_into_select_neg1_zero_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) - ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %select:_(s32) = G_SELECT %cond, %neg1, %zero - %and:_(s32) = G_AND %select, %variable - S_ENDPGM 0, implicit %and + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %select:_(i32) = G_SEXT %cond(i1) + ; CHECK-NEXT: %and:_(i32) = G_AND %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %and(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %select:_(i32) = G_SELECT %cond(i1), %neg1, %zero + %and:_(i32) = G_AND %select, %variable + S_ENDPGM 0, implicit %and(i32) ... --- @@ -499,21 +499,21 @@ body: | ; CHECK-LABEL: name: fold_or_variable_into_select_zero_neg1_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(ne), %reg(s32), %zero - ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) - ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %select:_(s32) = G_SELECT %cond, %zero, %neg1 - %or:_(s32) = G_OR %select, %variable - S_ENDPGM 0, implicit %or + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(ne), %reg(i32), %zero + ; CHECK-NEXT: %select:_(i32) = G_SEXT %cond(i1) + ; CHECK-NEXT: %or:_(i32) = G_OR %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %or(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %select:_(i32) = G_SELECT %cond(i1), %zero, %neg1 + %or:_(i32) = G_OR %select, %variable + S_ENDPGM 0, implicit %or(i32) ... --- @@ -525,21 +525,21 @@ body: | ; CHECK-LABEL: name: fold_or_variable_into_select_neg1_zero_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1) - ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %select:_(s32) = G_SELECT %cond, %neg1, %zero - %or:_(s32) = G_OR %select, %variable - S_ENDPGM 0, implicit %or + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %select:_(i32) = G_SEXT %cond(i1) + ; CHECK-NEXT: %or:_(i32) = G_OR %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %or(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %select:_(i32) = G_SELECT %cond(i1), %neg1, %zero + %or:_(i32) = G_OR %select, %variable + S_ENDPGM 0, implicit %or(i32) ... --- @@ -551,24 +551,24 @@ body: | ; CHECK-LABEL: name: fold_and_variable_into_select_undef_neg1_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %undef, %neg1 - ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %undef:_(s32) = G_IMPLICIT_DEF - %neg1:_(s32) = G_CONSTANT i32 -1 - %select:_(s32) = G_SELECT %cond, %undef, %neg1 - %and:_(s32) = G_AND %select, %variable - S_ENDPGM 0, implicit %and + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: %neg1:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %undef, %neg1 + ; CHECK-NEXT: %and:_(i32) = G_AND %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %and(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %undef:_(i32) = G_IMPLICIT_DEF + %neg1:_(i32) = G_CONSTANT i32 -1 + %select:_(i32) = G_SELECT %cond(i1), %undef, %neg1 + %and:_(i32) = G_AND %select, %variable + S_ENDPGM 0, implicit %and(i32) ... --- @@ -580,22 +580,22 @@ body: | ; CHECK-LABEL: name: fold_and_variable_into_select_undef_zero_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %undef, %zero - ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %undef:_(s32) = G_IMPLICIT_DEF - %select:_(s32) = G_SELECT %cond, %undef, %zero - %and:_(s32) = G_AND %select, %variable - S_ENDPGM 0, implicit %and + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %undef, %zero + ; CHECK-NEXT: %and:_(i32) = G_AND %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %and(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %undef:_(i32) = G_IMPLICIT_DEF + %select:_(i32) = G_SELECT %cond(i1), %undef, %zero + %and:_(i32) = G_AND %select, %variable + S_ENDPGM 0, implicit %and(i32) ... --- @@ -607,24 +607,26 @@ body: | ; CHECK-LABEL: name: fold_or_variable_into_select_neg1_fpzero_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: %fpzero:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR %fpzero, %variable - ; CHECK-NEXT: %or:_(s32) = G_SELECT %cond(s1), %neg1, [[OR]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %fpzero:_(s32) = G_FCONSTANT float 0.0 - %select:_(s32) = G_SELECT %cond, %neg1, %fpzero - %or:_(s32) = G_OR %select, %variable - S_ENDPGM 0, implicit %or + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %neg1:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: %fpzero:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %fpzero(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %neg1, [[BITCAST]] + ; CHECK-NEXT: %or:_(i32) = G_OR %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %or(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %fpzero:_(f32) = G_FCONSTANT float 0.000000e+00 + %6:_(i32) = G_BITCAST %fpzero(f32) + %select:_(i32) = G_SELECT %cond(i1), %neg1, %6 + %or:_(i32) = G_OR %select, %variable + S_ENDPGM 0, implicit %or(i32) ... --- @@ -636,24 +638,26 @@ body: | ; CHECK-LABEL: name: no_fold_or_variable_into_select_neg1_fpnegzero_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: %fpzero:_(s32) = G_FCONSTANT float -0.000000e+00 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %neg1, %fpzero - ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %fpzero:_(s32) = G_FCONSTANT float -0.0 - %select:_(s32) = G_SELECT %cond, %neg1, %fpzero - %or:_(s32) = G_OR %select, %variable - S_ENDPGM 0, implicit %or + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %neg1:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: %fpzero:_(f32) = G_FCONSTANT float -0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %fpzero(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %neg1, [[BITCAST]] + ; CHECK-NEXT: %or:_(i32) = G_OR %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %or(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %fpzero:_(f32) = G_FCONSTANT float -0.000000e+00 + %6:_(i32) = G_BITCAST %fpzero(f32) + %select:_(i32) = G_SELECT %cond(i1), %neg1, %6 + %or:_(i32) = G_OR %select, %variable + S_ENDPGM 0, implicit %or(i32) ... --- @@ -665,24 +669,24 @@ body: | ; CHECK-LABEL: name: no_fold_or_variable_into_select_neg1_other_const_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %otherconst:_(s32) = G_CONSTANT i32 123 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %cond(s1) - ; CHECK-NEXT: %select:_(s32) = G_OR [[SEXT]], %otherconst - ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable - ; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32) - %reg:_(s32) = COPY $vgpr0 - %variable:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %neg1:_(s32) = G_CONSTANT i32 -1 - %otherconst:_(s32) = G_CONSTANT i32 123 - %select:_(s32) = G_SELECT %cond, %neg1, %otherconst - %or:_(s32) = G_OR %select, %variable - S_ENDPGM 0, implicit %or + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %variable:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %otherconst:_(i32) = G_CONSTANT i32 123 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT %cond(i1) + ; CHECK-NEXT: %select:_(i32) = G_OR [[SEXT]], %otherconst + ; CHECK-NEXT: %or:_(i32) = G_OR %select, %variable + ; CHECK-NEXT: S_ENDPGM 0, implicit %or(i32) + %reg:_(i32) = COPY $vgpr0 + %variable:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %neg1:_(i32) = G_CONSTANT i32 -1 + %otherconst:_(i32) = G_CONSTANT i32 123 + %select:_(i32) = G_SELECT %cond(i1), %neg1, %otherconst + %or:_(i32) = G_OR %select, %variable + S_ENDPGM 0, implicit %or(i32) ... --- @@ -694,22 +698,22 @@ body: | ; CHECK-LABEL: name: fold_xor_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %xor:_(s32) = G_SELECT %cond(s1), %twenty, %ten - ; CHECK-NEXT: S_ENDPGM 0, implicit %xor(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %xor:_(s32) = G_XOR %select, %thirty - S_ENDPGM 0, implicit %xor + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %xor:_(i32) = G_SELECT %cond(i1), %twenty, %ten + ; CHECK-NEXT: S_ENDPGM 0, implicit %xor(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %xor:_(i32) = G_XOR %select, %thirty + S_ENDPGM 0, implicit %xor(i32) ... --- @@ -721,22 +725,22 @@ body: | ; CHECK-LABEL: name: fold_sdiv_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: %sdiv:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %sdiv(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %hundred:_(s32) = G_CONSTANT i32 100 - %fortytwo:_(s32) = G_CONSTANT i32 42 - %select:_(s32) = G_SELECT %cond, %hundred, %fortytwo - %two:_(s32) = G_CONSTANT i32 2 - %sdiv:_(s32) = G_SDIV %select, %two - S_ENDPGM 0, implicit %sdiv + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; CHECK-NEXT: %sdiv:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %sdiv(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %hundred:_(i32) = G_CONSTANT i32 100 + %fortytwo:_(i32) = G_CONSTANT i32 42 + %select:_(i32) = G_SELECT %cond(i1), %hundred, %fortytwo + %two:_(i32) = G_CONSTANT i32 2 + %sdiv:_(i32) = G_SDIV %select, %two + S_ENDPGM 0, implicit %sdiv(i32) ... --- @@ -748,20 +752,20 @@ body: | ; CHECK-LABEL: name: fold_srem_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %srem:_(s32) = G_ZEXT %cond(s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit %srem(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %hundred:_(s32) = G_CONSTANT i32 100 - %fortytwo:_(s32) = G_CONSTANT i32 42 - %select:_(s32) = G_SELECT %cond, %hundred, %fortytwo - %three:_(s32) = G_CONSTANT i32 3 - %srem:_(s32) = G_SREM %select, %three - S_ENDPGM 0, implicit %srem + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %srem:_(i32) = G_ZEXT %cond(i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit %srem(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %hundred:_(i32) = G_CONSTANT i32 100 + %fortytwo:_(i32) = G_CONSTANT i32 42 + %select:_(i32) = G_SELECT %cond(i1), %hundred, %fortytwo + %three:_(i32) = G_CONSTANT i32 3 + %srem:_(i32) = G_SREM %select, %three + S_ENDPGM 0, implicit %srem(i32) ... --- @@ -773,22 +777,22 @@ body: | ; CHECK-LABEL: name: fold_udiv_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %hundred:_(s32) = G_CONSTANT i32 100 - %fortytwo:_(s32) = G_CONSTANT i32 42 - %select:_(s32) = G_SELECT %cond, %hundred, %fortytwo - %two:_(s32) = G_CONSTANT i32 2 - %udiv:_(s32) = G_UDIV %select, %two - S_ENDPGM 0, implicit %udiv + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %hundred:_(i32) = G_CONSTANT i32 100 + %fortytwo:_(i32) = G_CONSTANT i32 42 + %select:_(i32) = G_SELECT %cond(i1), %hundred, %fortytwo + %two:_(i32) = G_CONSTANT i32 2 + %udiv:_(i32) = G_UDIV %select, %two + S_ENDPGM 0, implicit %udiv(i32) ... --- @@ -800,20 +804,20 @@ body: | ; CHECK-LABEL: name: fold_urem_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %udiv:_(s32) = G_ZEXT %cond(s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit %udiv(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %hundred:_(s32) = G_CONSTANT i32 100 - %fortytwo:_(s32) = G_CONSTANT i32 42 - %select:_(s32) = G_SELECT %cond, %hundred, %fortytwo - %three:_(s32) = G_CONSTANT i32 3 - %udiv:_(s32) = G_UREM %select, %three - S_ENDPGM 0, implicit %udiv + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %udiv:_(i32) = G_ZEXT %cond(i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit %udiv(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %hundred:_(i32) = G_CONSTANT i32 100 + %fortytwo:_(i32) = G_CONSTANT i32 42 + %select:_(i32) = G_SELECT %cond(i1), %hundred, %fortytwo + %three:_(i32) = G_CONSTANT i32 3 + %udiv:_(i32) = G_UREM %select, %three + S_ENDPGM 0, implicit %udiv(i32) ... --- @@ -825,22 +829,22 @@ body: | ; CHECK-LABEL: name: fold_lshr_into_select_s64_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK-NEXT: %lshr:_(s64) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %lshr(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s64) = G_CONSTANT i64 10 - %twenty:_(s64) = G_CONSTANT i64 20 - %select:_(s64) = G_SELECT %cond, %ten, %twenty - %two:_(s32) = G_CONSTANT i32 2 - %lshr:_(s64) = G_LSHR %select, %two - S_ENDPGM 0, implicit %lshr + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 5 + ; CHECK-NEXT: %lshr:_(i64) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %lshr(i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i64) = G_CONSTANT i64 10 + %twenty:_(i64) = G_CONSTANT i64 20 + %select:_(i64) = G_SELECT %cond(i1), %ten, %twenty + %two:_(i32) = G_CONSTANT i32 2 + %lshr:_(i64) = G_LSHR %select, %two(i32) + S_ENDPGM 0, implicit %lshr(i64) ... --- @@ -852,22 +856,22 @@ body: | ; CHECK-LABEL: name: fold_lshr_into_select_s64_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1012 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 253 - ; CHECK-NEXT: %lshr:_(s64) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %lshr(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %three:_(s32) = G_CONSTANT i32 3 - %five:_(s32) = G_CONSTANT i32 5 - %select:_(s32) = G_SELECT %cond, %three, %five - %val:_(s64) = G_CONSTANT i64 8096 - %lshr:_(s64) = G_LSHR %val, %select - S_ENDPGM 0, implicit %lshr + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1012 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 253 + ; CHECK-NEXT: %lshr:_(i64) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %lshr(i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %three:_(i32) = G_CONSTANT i32 3 + %five:_(i32) = G_CONSTANT i32 5 + %select:_(i32) = G_SELECT %cond(i1), %three, %five + %val:_(i64) = G_CONSTANT i64 8096 + %lshr:_(i64) = G_LSHR %val, %select(i32) + S_ENDPGM 0, implicit %lshr(i64) ... --- @@ -879,22 +883,22 @@ body: | ; CHECK-LABEL: name: fold_ashr_into_select_s64_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK-NEXT: %ashr:_(s64) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %ashr(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s64) = G_CONSTANT i64 10 - %twenty:_(s64) = G_CONSTANT i64 20 - %select:_(s64) = G_SELECT %cond, %ten, %twenty - %two:_(s32) = G_CONSTANT i32 2 - %ashr:_(s64) = G_ASHR %select, %two - S_ENDPGM 0, implicit %ashr + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 5 + ; CHECK-NEXT: %ashr:_(i64) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %ashr(i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i64) = G_CONSTANT i64 10 + %twenty:_(i64) = G_CONSTANT i64 20 + %select:_(i64) = G_SELECT %cond(i1), %ten, %twenty + %two:_(i32) = G_CONSTANT i32 2 + %ashr:_(i64) = G_ASHR %select, %two(i32) + S_ENDPGM 0, implicit %ashr(i64) ... --- @@ -906,22 +910,22 @@ body: | ; CHECK-LABEL: name: fold_ashr_into_select_s64_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1012 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -253 - ; CHECK-NEXT: %ashr:_(s64) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %ashr(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %three:_(s32) = G_CONSTANT i32 3 - %five:_(s32) = G_CONSTANT i32 5 - %select:_(s32) = G_SELECT %cond, %three, %five - %val:_(s64) = G_CONSTANT i64 -8096 - %ashr:_(s64) = G_ASHR %val, %select - S_ENDPGM 0, implicit %ashr + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1012 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -253 + ; CHECK-NEXT: %ashr:_(i64) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %ashr(i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %three:_(i32) = G_CONSTANT i32 3 + %five:_(i32) = G_CONSTANT i32 5 + %select:_(i32) = G_SELECT %cond(i1), %three, %five + %val:_(i64) = G_CONSTANT i64 -8096 + %ashr:_(i64) = G_ASHR %val, %select(i32) + S_ENDPGM 0, implicit %ashr(i64) ... --- @@ -933,22 +937,22 @@ body: | ; CHECK-LABEL: name: fold_smin_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %ten, %twenty - ; CHECK-NEXT: S_ENDPGM 0, implicit %select(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %smin:_(s32) = G_SMIN %select, %thirty - S_ENDPGM 0, implicit %smin + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + ; CHECK-NEXT: S_ENDPGM 0, implicit %select(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %smin:_(i32) = G_SMIN %select, %thirty + S_ENDPGM 0, implicit %smin(i32) ... --- @@ -960,17 +964,17 @@ body: | ; CHECK-LABEL: name: fold_smax_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %thirty:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: S_ENDPGM 0, implicit %thirty(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %smax:_(s32) = G_SMAX %select, %thirty - S_ENDPGM 0, implicit %smax + ; CHECK-NEXT: %thirty:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: S_ENDPGM 0, implicit %thirty(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %smax:_(i32) = G_SMAX %select, %thirty + S_ENDPGM 0, implicit %smax(i32) ... --- @@ -982,22 +986,22 @@ body: | ; CHECK-LABEL: name: fold_umin_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %ten:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: %twenty:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %ten, %twenty - ; CHECK-NEXT: S_ENDPGM 0, implicit %select(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %umin:_(s32) = G_UMIN %select, %thirty - S_ENDPGM 0, implicit %umin + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %ten:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %twenty:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + ; CHECK-NEXT: S_ENDPGM 0, implicit %select(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %umin:_(i32) = G_UMIN %select, %thirty + S_ENDPGM 0, implicit %umin(i32) ... --- @@ -1009,17 +1013,17 @@ body: | ; CHECK-LABEL: name: fold_umax_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %thirty:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: S_ENDPGM 0, implicit %thirty(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %umax:_(s32) = G_UMAX %select, %thirty - S_ENDPGM 0, implicit %umax + ; CHECK-NEXT: %thirty:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: S_ENDPGM 0, implicit %thirty(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %umax:_(i32) = G_UMAX %select, %thirty + S_ENDPGM 0, implicit %umax(i32) ... --- @@ -1031,22 +1035,32 @@ body: | ; CHECK-LABEL: name: fold_fadd_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.800000e+01 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+01 - ; CHECK-NEXT: %fadd:_(s32) = nnan G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %fadd(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fadd:_(s32) = nnan G_FADD %select, %sixteen - S_ENDPGM 0, implicit %fadd + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fadd:_(f32) = nnan G_FADD [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fadd(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fadd:_(f32) = nnan G_FADD %9, %sixteen + %11:_(i32) = G_BITCAST %fadd(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1058,22 +1072,32 @@ body: | ; CHECK-LABEL: name: fold_fadd_into_select_s32_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.800000e+01 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+01 - ; CHECK-NEXT: %fadd:_(s32) = nnan G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %fadd(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fadd:_(s32) = nnan G_FADD %sixteen, %select - S_ENDPGM 0, implicit %fadd + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fadd:_(f32) = nnan G_FADD [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fadd(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fadd:_(f32) = nnan G_FADD %sixteen, %9 + %11:_(i32) = G_BITCAST %fadd(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1085,22 +1109,32 @@ body: | ; CHECK-LABEL: name: fold_fsub_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.400000e+01 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.200000e+01 - ; CHECK-NEXT: %fsub:_(s32) = nnan G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %fsub(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fsub:_(s32) = nnan G_FSUB %select, %sixteen - S_ENDPGM 0, implicit %fsub + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fsub:_(f32) = nnan G_FSUB [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fsub(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fsub:_(f32) = nnan G_FSUB %9, %sixteen + %11:_(i32) = G_BITCAST %fsub(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1112,22 +1146,32 @@ body: | ; CHECK-LABEL: name: fold_fmul_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; CHECK-NEXT: %fmul:_(s32) = nnan G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %fmul(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fmul:_(s32) = nnan G_FMUL %select, %sixteen - S_ENDPGM 0, implicit %fmul + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fmul:_(f32) = nnan G_FMUL [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fmul(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fmul:_(f32) = nnan G_FMUL %9, %sixteen + %11:_(i32) = G_BITCAST %fmul(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1139,22 +1183,32 @@ body: | ; CHECK-LABEL: name: fold_fdiv_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.250000e-01 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.500000e-01 - ; CHECK-NEXT: %fdiv:_(s32) = nnan G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %fdiv(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fdiv:_(s32) = nnan G_FDIV %select, %sixteen - S_ENDPGM 0, implicit %fdiv + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fdiv:_(f32) = nnan G_FDIV [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fdiv(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fdiv:_(f32) = nnan G_FDIV %9, %sixteen + %11:_(i32) = G_BITCAST %fdiv(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1166,22 +1220,32 @@ body: | ; CHECK-LABEL: name: fold_frem_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %frem:_(s32) = nnan G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: S_ENDPGM 0, implicit %frem(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %frem:_(s32) = nnan G_FREM %select, %sixteen - S_ENDPGM 0, implicit %frem + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %frem:_(f32) = nnan G_FREM [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %frem(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %frem:_(f32) = nnan G_FREM %9, %sixteen + %11:_(i32) = G_BITCAST %frem(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1193,24 +1257,32 @@ body: | ; CHECK-LABEL: name: fold_fpow_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: %sixteen:_(s32) = G_FCONSTANT float 1.600000e+01 - ; CHECK-NEXT: %fpow:_(s32) = nnan G_FPOW %select, %sixteen - ; CHECK-NEXT: S_ENDPGM 0, implicit %fpow(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fpow:_(s32) = nnan G_FPOW %select, %sixteen - S_ENDPGM 0, implicit %fpow + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fpow:_(f32) = nnan G_FPOW [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fpow(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fpow:_(f32) = nnan G_FPOW %9, %sixteen + %11:_(i32) = G_BITCAST %fpow(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1222,22 +1294,32 @@ body: | ; CHECK-LABEL: name: fold_fminnum_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %fminnum:_(s32) = nnan G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: S_ENDPGM 0, implicit %fminnum(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fminnum:_(s32) = nnan G_FMINNUM %select, %sixteen - S_ENDPGM 0, implicit %fminnum + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fminnum:_(f32) = nnan G_FMINNUM [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fminnum(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fminnum:_(f32) = nnan G_FMINNUM %9, %sixteen + %11:_(i32) = G_BITCAST %fminnum(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1249,24 +1331,32 @@ body: | ; CHECK-LABEL: name: fold_fminnum_ieee_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: %sixteen:_(s32) = G_FCONSTANT float 1.600000e+01 - ; CHECK-NEXT: %fminnum_ieee:_(s32) = nnan G_FMINNUM_IEEE %select, %sixteen - ; CHECK-NEXT: S_ENDPGM 0, implicit %fminnum_ieee(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fminnum_ieee:_(s32) = nnan G_FMINNUM_IEEE %select, %sixteen - S_ENDPGM 0, implicit %fminnum_ieee + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fminnum_ieee:_(f32) = nnan G_FMINNUM_IEEE [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fminnum_ieee(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fminnum_ieee:_(f32) = nnan G_FMINNUM_IEEE %9, %sixteen + %11:_(i32) = G_BITCAST %fminnum_ieee(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1278,22 +1368,32 @@ body: | ; CHECK-LABEL: name: fold_fmaxnum_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %three:_(s32) = G_FCONSTANT float 3.000000e+00 - ; CHECK-NEXT: %fmaxnum:_(s32) = nnan G_SELECT %cond(s1), %three, %four - ; CHECK-NEXT: S_ENDPGM 0, implicit %fmaxnum(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %three:_(s32) = G_FCONSTANT float 3.0 - %fmaxnum:_(s32) = nnan G_FMAXNUM %select, %three - S_ENDPGM 0, implicit %fmaxnum + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %three:_(f32) = G_FCONSTANT float 3.000000e+00 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fmaxnum:_(f32) = nnan G_FMAXNUM [[BITCAST2]], %three + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fmaxnum(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %three:_(f32) = G_FCONSTANT float 3.000000e+00 + %9:_(f32) = G_BITCAST %select(i32) + %fmaxnum:_(f32) = nnan G_FMAXNUM %9, %three + %11:_(i32) = G_BITCAST %fmaxnum(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1305,24 +1405,32 @@ body: | ; CHECK-LABEL: name: fold_fmaxnum_ieee_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: %three:_(s32) = G_FCONSTANT float 3.000000e+00 - ; CHECK-NEXT: %fmaxnum_ieee:_(s32) = nnan G_FMAXNUM_IEEE %select, %three - ; CHECK-NEXT: S_ENDPGM 0, implicit %fmaxnum_ieee(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %three:_(s32) = G_FCONSTANT float 3.0 - %fmaxnum_ieee:_(s32) = nnan G_FMAXNUM_IEEE %select, %three - S_ENDPGM 0, implicit %fmaxnum_ieee + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %three:_(f32) = G_FCONSTANT float 3.000000e+00 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fmaxnum_ieee:_(f32) = nnan G_FMAXNUM_IEEE [[BITCAST2]], %three + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fmaxnum_ieee(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %three:_(f32) = G_FCONSTANT float 3.000000e+00 + %9:_(f32) = G_BITCAST %select(i32) + %fmaxnum_ieee:_(f32) = nnan G_FMAXNUM_IEEE %9, %three + %11:_(i32) = G_BITCAST %fmaxnum_ieee(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1334,22 +1442,32 @@ body: | ; CHECK-LABEL: name: fold_fminimum_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %fminimum:_(s32) = nnan G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: S_ENDPGM 0, implicit %fminimum(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fminimum:_(s32) = nnan G_FMINIMUM %select, %sixteen - S_ENDPGM 0, implicit %fminimum + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fminimum:_(f32) = nnan G_FMINIMUM [[BITCAST2]], %sixteen + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fminimum(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fminimum:_(f32) = nnan G_FMINIMUM %9, %sixteen + %11:_(i32) = G_BITCAST %fminimum(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1361,22 +1479,32 @@ body: | ; CHECK-LABEL: name: fold_fmaximum_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %three:_(s32) = G_FCONSTANT float 3.000000e+00 - ; CHECK-NEXT: %fmaximum:_(s32) = nnan G_SELECT %cond(s1), %three, %four - ; CHECK-NEXT: S_ENDPGM 0, implicit %fmaximum(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %three:_(s32) = G_FCONSTANT float 3.0 - %fmaximum:_(s32) = nnan G_FMAXIMUM %select, %three - S_ENDPGM 0, implicit %fmaximum + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %three:_(f32) = G_FCONSTANT float 3.000000e+00 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fmaximum:_(f32) = nnan G_FMAXIMUM [[BITCAST2]], %three + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fmaximum(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %three:_(f32) = G_FCONSTANT float 3.000000e+00 + %9:_(f32) = G_BITCAST %select(i32) + %fmaximum:_(f32) = nnan G_FMAXIMUM %9, %three + %11:_(i32) = G_BITCAST %fmaximum(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1388,24 +1516,32 @@ body: | ; CHECK-LABEL: name: fold_fcopysign_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: %sixteen:_(s32) = G_FCONSTANT float 1.600000e+01 - ; CHECK-NEXT: %fcopysign:_(s32) = nnan G_FCOPYSIGN %select, %sixteen(s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit %fcopysign(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fcopysign:_(s32) = nnan G_FCOPYSIGN %select, %sixteen - S_ENDPGM 0, implicit %fcopysign + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fcopysign:_(f32) = nnan G_FCOPYSIGN [[BITCAST2]], %sixteen(f32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fcopysign(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fcopysign:_(f32) = nnan G_FCOPYSIGN %9, %sixteen(f32) + %11:_(i32) = G_BITCAST %fcopysign(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1417,24 +1553,32 @@ body: | ; CHECK-LABEL: name: fold_fcopysign_into_select_s32_s64_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: %four:_(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: %sixteen:_(s64) = G_FCONSTANT double 1.600000e+01 - ; CHECK-NEXT: %fcopysign:_(s32) = nnan G_FCOPYSIGN %select, %sixteen(s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit %fcopysign(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s32) = G_FCONSTANT float 2.0 - %four:_(s32) = G_FCONSTANT float 4.0 - %select:_(s32) = G_SELECT %cond, %two, %four - %sixteen:_(s64) = G_FCONSTANT double 16.0 - %fcopysign:_(s32) = nnan G_FCOPYSIGN %select, %sixteen - S_ENDPGM 0, implicit %fcopysign + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %four:_(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %two(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %four(f32) + ; CHECK-NEXT: %select:_(i32) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f64) = G_FCONSTANT double 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST %select(i32) + ; CHECK-NEXT: %fcopysign:_(f32) = nnan G_FCOPYSIGN [[BITCAST2]], %sixteen(f64) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST %fcopysign(f32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f32) = G_FCONSTANT float 2.000000e+00 + %four:_(f32) = G_FCONSTANT float 4.000000e+00 + %5:_(i32) = G_BITCAST %two(f32) + %6:_(i32) = G_BITCAST %four(f32) + %select:_(i32) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f64) = G_FCONSTANT double 1.600000e+01 + %9:_(f32) = G_BITCAST %select(i32) + %fcopysign:_(f32) = nnan G_FCOPYSIGN %9, %sixteen(f64) + %11:_(i32) = G_BITCAST %fcopysign(f32) + S_ENDPGM 0, implicit %11(i32) ... --- @@ -1446,24 +1590,32 @@ body: | ; CHECK-LABEL: name: fold_fcopysign_into_select_s64_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: %two:_(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK-NEXT: %four:_(s64) = G_FCONSTANT double 4.000000e+00 - ; CHECK-NEXT: %select:_(s64) = G_SELECT %cond(s1), %two, %four - ; CHECK-NEXT: %sixteen:_(s32) = G_FCONSTANT float 1.600000e+01 - ; CHECK-NEXT: %fcopysign:_(s64) = nnan G_FCOPYSIGN %select, %sixteen(s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit %fcopysign(s64) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %two:_(s64) = G_FCONSTANT double 2.0 - %four:_(s64) = G_FCONSTANT double 4.0 - %select:_(s64) = G_SELECT %cond, %two, %four - %sixteen:_(s32) = G_FCONSTANT float 16.0 - %fcopysign:_(s64) = nnan G_FCOPYSIGN %select, %sixteen - S_ENDPGM 0, implicit %fcopysign + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: %two:_(f64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: %four:_(f64) = G_FCONSTANT double 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST %two(f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %four(f64) + ; CHECK-NEXT: %select:_(i64) = G_SELECT %cond(i1), [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST %select(i64) + ; CHECK-NEXT: %fcopysign:_(f64) = nnan G_FCOPYSIGN [[BITCAST2]], %sixteen(f32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST %fcopysign(f64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST3]](i64) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %two:_(f64) = G_FCONSTANT double 2.000000e+00 + %four:_(f64) = G_FCONSTANT double 4.000000e+00 + %5:_(i64) = G_BITCAST %two(f64) + %6:_(i64) = G_BITCAST %four(f64) + %select:_(i64) = G_SELECT %cond(i1), %5, %6 + %sixteen:_(f32) = G_FCONSTANT float 1.600000e+01 + %9:_(f64) = G_BITCAST %select(i64) + %fcopysign:_(f64) = nnan G_FCOPYSIGN %9, %sixteen(f32) + %11:_(i64) = G_BITCAST %fcopysign(f64) + S_ENDPGM 0, implicit %11(i64) ... # Test handling of intermediate copy between add and select. @@ -1476,23 +1628,23 @@ body: | ; CHECK-LABEL: name: fold_add_copy_into_select_s32_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: %add:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %copy_select:_(s32) = COPY %select - %add:_(s32) = G_ADD %copy_select, %thirty - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: %add:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %copy_select:_(i32) = COPY %select(i32) + %add:_(i32) = G_ADD %copy_select, %thirty + S_ENDPGM 0, implicit %add(i32) ... --- @@ -1504,21 +1656,21 @@ body: | ; CHECK-LABEL: name: fold_add_copy_into_select_s32_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK-NEXT: %add:_(s32) = G_SELECT %cond(s1), [[C]], [[C1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit %add(s32) - %reg:_(s32) = COPY $vgpr0 - %zero:_(s32) = G_CONSTANT i32 0 - %cond:_(s1) = G_ICMP intpred(eq), %reg, %zero - %ten:_(s32) = G_CONSTANT i32 10 - %twenty:_(s32) = G_CONSTANT i32 20 - %select:_(s32) = G_SELECT %cond, %ten, %twenty - %thirty:_(s32) = G_CONSTANT i32 30 - %copy_select:_(s32) = COPY %select - %add:_(s32) = G_ADD %thirty, %copy_select - S_ENDPGM 0, implicit %add + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 50 + ; CHECK-NEXT: %add:_(i32) = G_SELECT %cond(i1), [[C]], [[C1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit %add(i32) + %reg:_(i32) = COPY $vgpr0 + %zero:_(i32) = G_CONSTANT i32 0 + %cond:_(i1) = G_ICMP intpred(eq), %reg(i32), %zero + %ten:_(i32) = G_CONSTANT i32 10 + %twenty:_(i32) = G_CONSTANT i32 20 + %select:_(i32) = G_SELECT %cond(i1), %ten, %twenty + %thirty:_(i32) = G_CONSTANT i32 30 + %copy_select:_(i32) = COPY %select(i32) + %add:_(i32) = G_ADD %thirty, %copy_select + S_ENDPGM 0, implicit %add(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir index 99170d3276cc2..c0a9032228c4e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir @@ -11,17 +11,23 @@ body: | ; CHECK-LABEL: name: test_fminnum ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(f32) = G_FMAXNUM [[FNEG]], [[FNEG1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -33,17 +39,23 @@ body: | ; CHECK-LABEL: name: test_fmaxnum ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = G_FMINNUM [[FNEG]], [[FNEG1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -55,17 +67,23 @@ body: | ; CHECK-LABEL: name: test_fminnum_ieee ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM_IEEE %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FNEG]], [[FNEG1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM_IEEE %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -77,17 +95,23 @@ body: | ; CHECK-LABEL: name: test_fmaxnum_ieee ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM_IEEE %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FNEG]], [[FNEG1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM_IEEE %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -99,17 +123,20 @@ body: | ; CHECK-LABEL: name: test_amdgpu_fmin_legacy ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_FMAX_LEGACY:%[0-9]+]]:_(s32) = G_AMDGPU_FMAX_LEGACY [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMAX_LEGACY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_AMDGPU_FMIN_LEGACY %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AMDGPU_FMIN_LEGACY:%[0-9]+]]:_(i32) = G_AMDGPU_FMIN_LEGACY [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_FMIN_LEGACY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_AMDGPU_FMIN_LEGACY %0, %1 + %3:_(f32) = G_BITCAST %2(i32) + %4:_(f32) = G_FNEG %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -121,17 +148,20 @@ body: | ; CHECK-LABEL: name: test_amdgpu_fmax_legacy ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_FMIN_LEGACY:%[0-9]+]]:_(s32) = G_AMDGPU_FMIN_LEGACY [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMIN_LEGACY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_AMDGPU_FMAX_LEGACY %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AMDGPU_FMAX_LEGACY:%[0-9]+]]:_(i32) = G_AMDGPU_FMAX_LEGACY [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_FMAX_LEGACY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_AMDGPU_FMAX_LEGACY %0, %1 + %3:_(f32) = G_BITCAST %2(i32) + %4:_(f32) = G_FNEG %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -143,16 +173,22 @@ body: | ; CHECK-LABEL: name: test_fadd ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[FNEG]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nsz G_FADD %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nsz G_FSUB [[FNEG]], [[BITCAST1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nsz G_FADD %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -164,15 +200,21 @@ body: | ; CHECK-LABEL: name: test_fsub ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY1]], [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nsz G_FSUB %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nsz G_FSUB [[BITCAST1]], [[BITCAST]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nsz G_FSUB %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -184,19 +226,22 @@ body: | ; CHECK-LABEL: name: test_fma ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nsz G_FMA [[COPY]], [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = nsz G_FMA %0, %1, %2 - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nsz G_FSUB [[FNEG]], [[BITCAST1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nsz G_FADD %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -208,19 +253,27 @@ body: | ; CHECK-LABEL: name: test_fmad ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]] - ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = nsz G_FMAD [[COPY]], [[FNEG]], [[FNEG1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = nsz G_FMAD %0, %1, %2 - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(f32) = nsz G_FMAD [[BITCAST]], [[FNEG]], [[FNEG1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = nsz G_FMAD %3, %4, %5 + %7:_(f32) = G_FNEG %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -232,16 +285,22 @@ body: | ; CHECK-LABEL: name: test_fmul ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMUL %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[FNEG]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -253,16 +312,20 @@ body: | ; CHECK-LABEL: name: test_fpext ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[FPEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = G_FPEXT %1(s16) - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FPEXT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f32) = G_FPEXT %2(f16) + %4:_(f32) = G_FNEG %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -274,14 +337,18 @@ body: | ; CHECK-LABEL: name: test_intrinsic_trunc ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_TRUNC %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_TRUNC]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_TRUNC %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -293,14 +360,18 @@ body: | ; CHECK-LABEL: name: test_frint ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FRINT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FRINT %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FRINT:%[0-9]+]]:_(f32) = G_FRINT [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FRINT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FRINT %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -312,14 +383,18 @@ body: | ; CHECK-LABEL: name: test_fnearbyint ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEARBYINT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FNEARBYINT %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FNEARBYINT:%[0-9]+]]:_(f32) = G_FNEARBYINT [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEARBYINT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FNEARBYINT %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -331,14 +406,18 @@ body: | ; CHECK-LABEL: name: test_intrinsic_round ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_ROUND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_ROUND %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUND [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_ROUND]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_ROUND %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -350,14 +429,18 @@ body: | ; CHECK-LABEL: name: test_intrinsic_roundeven ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_ROUNDEVEN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_ROUNDEVEN %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_ROUNDEVEN]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_ROUNDEVEN %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -369,14 +452,18 @@ body: | ; CHECK-LABEL: name: test_fsin ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FSIN:%[0-9]+]]:_(s32) = G_FSIN [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FSIN %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FSIN:%[0-9]+]]:_(f32) = G_FSIN [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSIN]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSIN %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -388,14 +475,18 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCANONICALIZE %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCANONICALIZE %1 + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -407,14 +498,18 @@ body: | ; CHECK-LABEL: name: test_amdgcn_rcp_iflag ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FNEG]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_RCP_IFLAG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_AMDGPU_RCP_IFLAG %0 - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(i32) = G_AMDGPU_RCP_IFLAG [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[AMDGPU_RCP_IFLAG]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_AMDGPU_RCP_IFLAG %0(i32) + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -425,14 +520,18 @@ body: | ; CHECK-LABEL: name: test_fptrunc ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[FNEG]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[FPTRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_FPTRUNC %0:_(s64) - %2:_(s32) = G_FNEG %1:_ - $vgpr0 = COPY %2:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f32) = G_FPTRUNC [[FNEG]](f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FPTRUNC]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f32) = G_FPTRUNC %1(f64) + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -444,14 +543,18 @@ body: | ; CHECK-LABEL: name: test_amdgcn_rcp ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0(s32) - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f32) + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -463,14 +566,18 @@ body: | ; CHECK-LABEL: name: test_amdgcn_rcp_legacy ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), [[FNEG]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0(s32) - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), [[FNEG]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %1(f32) + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -482,14 +589,18 @@ body: | ; CHECK-LABEL: name: test_amdgcn_sin ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FNEG]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0(s32) - %2:_(s32) = G_FNEG %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FNEG]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1(f32) + %3:_(f32) = G_FNEG %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -501,16 +612,22 @@ body: | ; CHECK-LABEL: name: test_fmul_legacy ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY]](s32), [[FNEG]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0(s32), %1(s32) - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[BITCAST]](f32), [[FNEG]](f32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %2(f32), %3(f32) + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -522,20 +639,28 @@ body: | ; CHECK-LABEL: name: test_fmed3 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0(s32), %1(s32), %2(s32) - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FNEG]](f32), [[FNEG1]](f32), [[FNEG2]](f32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(f32), %4(f32), %5(f32) + %7:_(f32) = G_FNEG %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -547,19 +672,27 @@ body: | ; CHECK-LABEL: name: test_amdgcn_fma_legacy ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[COPY]](s32), [[FNEG]](s32), [[FNEG1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %0(s32), %1(s32), %2(s32) - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[BITCAST]](f32), [[FNEG]](f32), [[FNEG1]](f32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %3(f32), %4(f32), %5(f32) + %7:_(f32) = G_FNEG %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... @@ -573,16 +706,22 @@ body: | ; CHECK-LABEL: name: test_fadd_sz ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FADD]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FADD %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FADD]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -594,16 +733,22 @@ body: | ; CHECK-LABEL: name: test_fsub_sz ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FSUB]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FSUB %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FSUB]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FSUB %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -615,18 +760,26 @@ body: | ; CHECK-LABEL: name: test_fma_sz ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMA]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMA %0, %1, %2 - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMA]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 + %7:_(f32) = G_FNEG %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -638,18 +791,26 @@ body: | ; CHECK-LABEL: name: test_fmad_sz ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMAD]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMAD %0, %1, %2 - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMAD]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMAD %3, %4, %5 + %7:_(f32) = G_FNEG %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -661,18 +822,26 @@ body: | ; CHECK-LABEL: name: test_amdgcn_fma_legacy_sz ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %0(s32), %1(s32), %2(s32) - %4:_(s32) = G_FNEG %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](f32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %3(f32), %4(f32), %5(f32) + %7:_(f32) = G_FNEG %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... @@ -685,16 +854,20 @@ body: | ; CHECK-LABEL: name: test_fminnum_zero ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[C]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMINNUM]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMINNUM %0:_, %1:_ - %3:_(s32) = G_FNEG %2:_ - $vgpr0 = COPY %3:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = G_FMINNUM [[BITCAST]], [[C]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMINNUM]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMINNUM %2, %1 + %4:_(f32) = G_FNEG %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... @@ -707,20 +880,24 @@ body: | ; CHECK-LABEL: name: test_fminnum_inv2pi_half ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s16) = G_FMINNUM [[TRUNC]], [[C]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FMINNUM]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0:_(s32) - %2:_(s16) = G_FCONSTANT half 0xH3118 - %3:_(s16) = G_FMINNUM %1:_, %2:_ - %4:_(s16) = G_FNEG %3:_ - %5:_(s32) = G_ANYEXT %4:_(s16) - $vgpr0 = COPY %5:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(f16) = G_FMINNUM [[BITCAST]], [[C]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[FMINNUM]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FNEG]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_FCONSTANT half 0xH3118 + %3:_(f16) = G_BITCAST %1(i16) + %4:_(f16) = G_FMINNUM %3, %2 + %5:_(f16) = G_FNEG %4 + %6:_(i16) = G_BITCAST %5(f16) + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -731,16 +908,20 @@ body: | ; CHECK-LABEL: name: test_fminnum_inv2pi_float ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[C]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMINNUM]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - %2:_(s32) = G_FMINNUM %0:_, %1:_ - %3:_(s32) = G_FNEG %2:_ - $vgpr0 = COPY %3:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = G_FMINNUM [[BITCAST]], [[C]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMINNUM]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMINNUM %2, %1 + %4:_(f32) = G_FNEG %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -751,16 +932,20 @@ body: | ; CHECK-LABEL: name: test_fminnum_inv2pi_double ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C882 - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s64) = G_FMINNUM [[COPY]], [[C]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FMINNUM]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C882 - %2:_(s64) = G_FMINNUM %0:_, %1:_ - %3:_(s64) = G_FNEG %2:_ - $vgpr0_vgpr1 = COPY %3:_(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C882 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(f64) = G_FMINNUM [[BITCAST]], [[C]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[FMINNUM]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FNEG]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C882 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_FMINNUM %2, %1 + %4:_(f64) = G_FNEG %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... @@ -774,24 +959,36 @@ body: | ; CHECK-LABEL: name: test_use_both ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[FNEG]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[FMUL1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMUL %0, %1 - %4:_(s32) = G_FNEG %3 - %5:_(s32) = G_FMUL %4, %2 - $vgpr0 = COPY %3:_(s32) - $vgpr1 = COPY %4:_(s32) - $vgpr2 = COPY %5:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FMUL %3, %4 + %6:_(f32) = G_FNEG %5 + %7:_(f32) = G_BITCAST %2(i32) + %8:_(f32) = G_FMUL %6, %7 + %9:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %9(i32) + %10:_(i32) = G_BITCAST %6(f32) + $vgpr1 = COPY %10(i32) + %11:_(i32) = G_BITCAST %8(f32) + $vgpr2 = COPY %11(i32) ... @@ -805,18 +1002,26 @@ body: | ; CHECK-LABEL: name: test_use_both2 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMUL %0, %1 - %3:_(s32) = G_FNEG %2 - $vgpr0 = COPY %2:_(s32) - $vgpr1 = COPY %3:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 + %5:_(f32) = G_FNEG %4 + %6:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %6(i32) + %7:_(i32) = G_BITCAST %5(f32) + $vgpr1 = COPY %7(i32) ... @@ -829,30 +1034,44 @@ body: | ; CHECK-LABEL: name: multiple_uses_of_fneg ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[FMUL1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[FMUL2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - - %4:_(s32) = G_FMUL %0, %1 - %5:_(s32) = G_FNEG %4 - %6:_(s32) = G_FMUL %5, %2 - %7:_(s32) = G_FMUL %5, %3 - - $vgpr0 = COPY %5:_(s32) - $vgpr1 = COPY %6:_(s32) - $vgpr2 = COPY %7:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[FNEG]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FMUL]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL]], [[BITCAST3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL2]](f32) + ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST6]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(f32) = G_BITCAST %0(i32) + %5:_(f32) = G_BITCAST %1(i32) + %6:_(f32) = G_FMUL %4, %5 + %7:_(f32) = G_FNEG %6 + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_FMUL %7, %8 + %10:_(f32) = G_BITCAST %3(i32) + %11:_(f32) = G_FMUL %7, %10 + %12:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %12(i32) + %13:_(i32) = G_BITCAST %9(f32) + $vgpr1 = COPY %13(i32) + %14:_(i32) = G_BITCAST %11(f32) + $vgpr2 = COPY %14(i32) + + ... @@ -866,22 +1085,32 @@ body: | ; CHECK-LABEL: name: fneg_src_has_multiple_uses ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMUL %0:_, %1:_ - %4:_(s32) = G_FMUL %3:_, %2:_ - %5:_(s32) = G_FNEG %3:_ - $vgpr0 = COPY %4:_(s32) - $vgpr1 = COPY %5:_(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST1]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[FNEG]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FMUL %3, %4 + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = G_FMUL %5, %6 + %8:_(f32) = G_FNEG %5 + %9:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %9(i32) + %10:_(i32) = G_BITCAST %8(f32) + $vgpr1 = COPY %10(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir index bdfc7c2b25c28..857a74f19e2ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir @@ -11,17 +11,21 @@ body: | ; CHECK-LABEL: name: test_neg_one_f16_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: %d:_(s16) = G_FNEG %x - ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %d(s16) - ; CHECK-NEXT: $sgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $sgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %y:_(s16) = G_FCONSTANT half -1.0 - %d:_(s16) = G_FMUL %x, %y - %ext:_(s32) = G_ANYEXT %d:_(s16) - $sgpr0 = COPY %ext + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; CHECK-NEXT: %d:_(f16) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %d(f16) + ; CHECK-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $sgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $sgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %y:_(f16) = G_FCONSTANT half 0xHBC00 + %3:_(f16) = G_BITCAST %x(i16) + %d:_(f16) = G_FMUL %3, %y + %5:_(i16) = G_BITCAST %d(f16) + %ext:_(i32) = G_ANYEXT %5(i16) + $sgpr0 = COPY %ext(i32) ... @@ -34,13 +38,17 @@ body: | ; CHECK-LABEL: name: test_neg_one_f32_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: $sgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FCONSTANT float -1.0 - %2:_(s32) = G_FMUL %0, %1 - $sgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $sgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_FCONSTANT float -1.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMUL %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $sgpr0 = COPY %4(i32) ... @@ -54,17 +62,21 @@ body: | ; CHECK-LABEL: name: test_neg_one_f64_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: %d:_(s64) = G_FNEG %x - ; CHECK-NEXT: %ext:_(s32) = G_TRUNC %d(s64) - ; CHECK-NEXT: $sgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $sgpr0 - %x:_(s64) = G_ANYEXT %0:_(s32) - %y:_(s64) = G_FCONSTANT double -1.0 - %d:_(s64) = G_FMUL %x, %y - %ext:_(s32) = G_TRUNC %d:_(s64) - $sgpr0 = COPY %ext + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: %x:_(i64) = G_ANYEXT [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %x(i64) + ; CHECK-NEXT: %d:_(f64) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %d(f64) + ; CHECK-NEXT: %ext:_(i32) = G_TRUNC [[BITCAST1]](i64) + ; CHECK-NEXT: $sgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $sgpr0 + %x:_(i64) = G_ANYEXT %0(i32) + %y:_(f64) = G_FCONSTANT double -1.000000e+00 + %3:_(f64) = G_BITCAST %x(i64) + %d:_(f64) = G_FMUL %3, %y + %5:_(i64) = G_BITCAST %d(f64) + %ext:_(i32) = G_TRUNC %5(i64) + $sgpr0 = COPY %ext(i32) ... @@ -77,14 +89,18 @@ body: | ; CHECK-LABEL: name: test_neg_ten_f32_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+01 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; CHECK-NEXT: $sgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FCONSTANT float -10.0 - %2:_(s32) = G_FMUL %0, %1 - $sgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+01 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $sgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_FCONSTANT float -1.000000e+01 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMUL %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $sgpr0 = COPY %4(i32) ... @@ -97,14 +113,18 @@ body: | ; CHECK-LABEL: name: test_neg_fract_f32_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.000000e-01 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; CHECK-NEXT: $sgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FCONSTANT float -0.5 - %2:_(s32) = G_FMUL %0, %1 - $sgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -5.000000e-01 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $sgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_FCONSTANT float -5.000000e-01 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMUL %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $sgpr0 = COPY %4(i32) ... @@ -118,17 +138,21 @@ body: | ; CHECK-LABEL: name: test_neg_one_f16_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: %d:_(s16) = G_FNEG %x - ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %d(s16) - ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s16) = G_TRUNC %0:_(s32) - %y:_(s16) = G_FCONSTANT half -1.0 - %d:_(s16) = G_FMUL %x, %y - %ext:_(s32) = G_ANYEXT %d:_(s16) - $vgpr0 = COPY %ext + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %x:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %x(i16) + ; CHECK-NEXT: %d:_(f16) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %d(f16) + ; CHECK-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i16) = G_TRUNC %0(i32) + %y:_(f16) = G_FCONSTANT half 0xHBC00 + %3:_(f16) = G_BITCAST %x(i16) + %d:_(f16) = G_FMUL %3, %y + %5:_(i16) = G_BITCAST %d(f16) + %ext:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %ext(i32) ... @@ -141,13 +165,17 @@ body: | ; CHECK-LABEL: name: test_neg_one_f32_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float -1.0 - %2:_(s32) = G_FMUL %0, %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float -1.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMUL %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... @@ -161,17 +189,21 @@ body: | ; CHECK-LABEL: name: test_neg_one_f64_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: %d:_(s64) = G_FNEG %x - ; CHECK-NEXT: %ext:_(s32) = G_TRUNC %d(s64) - ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) - %0:_(s32) = COPY $vgpr0 - %x:_(s64) = G_ANYEXT %0:_(s32) - %y:_(s64) = G_FCONSTANT double -1.0 - %d:_(s64) = G_FMUL %x, %y - %ext:_(s32) = G_TRUNC %d:_(s64) - $vgpr0 = COPY %ext + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %x:_(i64) = G_ANYEXT [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %x(i64) + ; CHECK-NEXT: %d:_(f64) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %d(f64) + ; CHECK-NEXT: %ext:_(i32) = G_TRUNC [[BITCAST1]](i64) + ; CHECK-NEXT: $vgpr0 = COPY %ext(i32) + %0:_(i32) = COPY $vgpr0 + %x:_(i64) = G_ANYEXT %0(i32) + %y:_(f64) = G_FCONSTANT double -1.000000e+00 + %3:_(f64) = G_BITCAST %x(i64) + %d:_(f64) = G_FMUL %3, %y + %5:_(i64) = G_BITCAST %d(f64) + %ext:_(i32) = G_TRUNC %5(i64) + $vgpr0 = COPY %ext(i32) ... @@ -184,14 +216,18 @@ body: | ; CHECK-LABEL: name: test_neg_ten_f32_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+01 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float -10.0 - %2:_(s32) = G_FMUL %0, %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+01 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float -1.000000e+01 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMUL %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... @@ -204,13 +240,17 @@ body: | ; CHECK-LABEL: name: test_neg_fract_f32_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.000000e-01 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float -0.5 - %2:_(s32) = G_FMUL %0, %1 - $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -5.000000e-01 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float -5.000000e-01 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMUL %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir index 4574d95e4eb81..d39cb0c4e1d64 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir @@ -11,20 +11,20 @@ body: | ; CHECK-LABEL: name: fshl_i32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %or:_(s32) = G_FSHL %a, %b, %amt(s32) - ; CHECK-NEXT: $vgpr3 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt:_(s32) = COPY $vgpr2 - %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %lshr:_(s32) = G_LSHR %b, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr3 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: %or:_(i32) = G_FSHL %a, %b, %amt(i32) + ; CHECK-NEXT: $vgpr3 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt:_(i32) = COPY $vgpr2 + %bw:_(i32) = G_CONSTANT i32 32 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %lshr:_(i32) = G_LSHR %b, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr3 = COPY %or(i32) ... --- @@ -37,21 +37,21 @@ body: | ; CHECK-LABEL: name: fshl_v2i32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %b:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %amt:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %or:_(<2 x s32>) = G_FSHL %a, %b, %amt(<2 x s32>) - ; CHECK-NEXT: $vgpr6_vgpr7 = COPY %or(<2 x s32>) - %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %b:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %amt:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %scalar_bw:_(s32) = G_CONSTANT i32 32 - %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw, %scalar_bw - %shl:_(<2 x s32>) = G_SHL %a, %amt - %sub:_(<2 x s32>) = G_SUB %bw, %amt - %lshr:_(<2 x s32>) = G_LSHR %b, %sub - %or:_(<2 x s32>) = G_OR %shl, %lshr - $vgpr6_vgpr7 = COPY %or + ; CHECK-NEXT: %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %b:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %amt:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: %or:_(<2 x i32>) = G_FSHL %a, %b, %amt(<2 x i32>) + ; CHECK-NEXT: $vgpr6_vgpr7 = COPY %or(<2 x i32>) + %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %b:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %amt:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %scalar_bw:_(i32) = G_CONSTANT i32 32 + %bw:_(<2 x i32>) = G_BUILD_VECTOR %scalar_bw(i32), %scalar_bw(i32) + %shl:_(<2 x i32>) = G_SHL %a, %amt(<2 x i32>) + %sub:_(<2 x i32>) = G_SUB %bw, %amt + %lshr:_(<2 x i32>) = G_LSHR %b, %sub(<2 x i32>) + %or:_(<2 x i32>) = G_OR %shl, %lshr + $vgpr6_vgpr7 = COPY %or(<2 x i32>) ... --- @@ -64,20 +64,20 @@ body: | ; CHECK-LABEL: name: fshl_commute_i32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %or:_(s32) = G_FSHL %a, %b, %amt(s32) - ; CHECK-NEXT: $vgpr3 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt:_(s32) = COPY $vgpr2 - %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %lshr:_(s32) = G_LSHR %b, %sub - %or:_(s32) = G_OR %lshr, %shl - $vgpr3 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: %or:_(i32) = G_FSHL %a, %b, %amt(i32) + ; CHECK-NEXT: $vgpr3 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt:_(i32) = COPY $vgpr2 + %bw:_(i32) = G_CONSTANT i32 32 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %lshr:_(i32) = G_LSHR %b, %sub(i32) + %or:_(i32) = G_OR %lshr, %shl + $vgpr3 = COPY %or(i32) ... --- @@ -90,20 +90,20 @@ body: | ; CHECK-LABEL: name: fshr_i32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %or:_(s32) = G_FSHR %a, %b, %amt(s32) - ; CHECK-NEXT: $vgpr3 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt:_(s32) = COPY $vgpr2 - %bw:_(s32) = G_CONSTANT i32 32 - %lshr:_(s32) = G_LSHR %b, %amt - %sub:_(s32) = G_SUB %bw, %amt - %shl:_(s32) = G_SHL %a, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr3 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: %or:_(i32) = G_FSHR %a, %b, %amt(i32) + ; CHECK-NEXT: $vgpr3 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt:_(i32) = COPY $vgpr2 + %bw:_(i32) = G_CONSTANT i32 32 + %lshr:_(i32) = G_LSHR %b, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %shl:_(i32) = G_SHL %a, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr3 = COPY %or(i32) ... --- @@ -116,19 +116,19 @@ body: | ; CHECK-LABEL: name: fsh_i32_const ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: %or:_(s32) = G_FSHR %a, %b, %amt1(s32) - ; CHECK-NEXT: $vgpr2 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt0:_(s32) = G_CONSTANT i32 20 - %amt1:_(s32) = G_CONSTANT i32 12 - %shl:_(s32) = G_SHL %a, %amt0 - %lshr:_(s32) = G_LSHR %b, %amt1 - %or:_(s32) = G_OR %shl, %lshr - $vgpr2 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt1:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %or:_(i32) = G_FSHR %a, %b, %amt1(i32) + ; CHECK-NEXT: $vgpr2 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt0:_(i32) = G_CONSTANT i32 20 + %amt1:_(i32) = G_CONSTANT i32 12 + %shl:_(i32) = G_SHL %a, %amt0(i32) + %lshr:_(i32) = G_LSHR %b, %amt1(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr2 = COPY %or(i32) ... --- @@ -141,22 +141,22 @@ body: | ; CHECK-LABEL: name: fsh_v2i32_const ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %b:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %scalar_amt1:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: %amt1:_(<2 x s32>) = G_BUILD_VECTOR %scalar_amt1(s32), %scalar_amt1(s32) - ; CHECK-NEXT: %or:_(<2 x s32>) = G_FSHR %a, %b, %amt1(<2 x s32>) - ; CHECK-NEXT: $vgpr4_vgpr5 = COPY %or(<2 x s32>) - %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %b:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %scalar_amt0:_(s32) = G_CONSTANT i32 20 - %amt0:_(<2 x s32>) = G_BUILD_VECTOR %scalar_amt0, %scalar_amt0 - %scalar_amt1:_(s32) = G_CONSTANT i32 12 - %amt1:_(<2 x s32>) = G_BUILD_VECTOR %scalar_amt1, %scalar_amt1 - %shl:_(<2 x s32>) = G_SHL %a, %amt0 - %lshr:_(<2 x s32>) = G_LSHR %b, %amt1 - %or:_(<2 x s32>) = G_OR %shl, %lshr - $vgpr4_vgpr5 = COPY %or + ; CHECK-NEXT: %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %b:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %scalar_amt1:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %amt1:_(<2 x i32>) = G_BUILD_VECTOR %scalar_amt1(i32), %scalar_amt1(i32) + ; CHECK-NEXT: %or:_(<2 x i32>) = G_FSHR %a, %b, %amt1(<2 x i32>) + ; CHECK-NEXT: $vgpr4_vgpr5 = COPY %or(<2 x i32>) + %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %b:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %scalar_amt0:_(i32) = G_CONSTANT i32 20 + %amt0:_(<2 x i32>) = G_BUILD_VECTOR %scalar_amt0(i32), %scalar_amt0(i32) + %scalar_amt1:_(i32) = G_CONSTANT i32 12 + %amt1:_(<2 x i32>) = G_BUILD_VECTOR %scalar_amt1(i32), %scalar_amt1(i32) + %shl:_(<2 x i32>) = G_SHL %a, %amt0(<2 x i32>) + %lshr:_(<2 x i32>) = G_LSHR %b, %amt1(<2 x i32>) + %or:_(<2 x i32>) = G_OR %shl, %lshr + $vgpr4_vgpr5 = COPY %or(<2 x i32>) ... --- @@ -169,22 +169,22 @@ body: | ; CHECK-LABEL: name: fsh_i32_bad_const ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt0:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt0(s32) - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %amt1(s32) - ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr - ; CHECK-NEXT: $vgpr2 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt0:_(s32) = G_CONSTANT i32 20 - %amt1:_(s32) = G_CONSTANT i32 11 - %shl:_(s32) = G_SHL %a, %amt0 - %lshr:_(s32) = G_LSHR %b, %amt1 - %or:_(s32) = G_OR %shl, %lshr - $vgpr2 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt0:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %amt1:_(i32) = G_CONSTANT i32 11 + ; CHECK-NEXT: %shl:_(i32) = G_SHL %a, %amt0(i32) + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %b, %amt1(i32) + ; CHECK-NEXT: %or:_(i32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr2 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt0:_(i32) = G_CONSTANT i32 20 + %amt1:_(i32) = G_CONSTANT i32 11 + %shl:_(i32) = G_SHL %a, %amt0(i32) + %lshr:_(i32) = G_LSHR %b, %amt1(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr2 = COPY %or(i32) ... --- @@ -197,24 +197,24 @@ body: | ; CHECK-LABEL: name: fshl_i32_bad_bw ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) - ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %sub(s32) - ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr - ; CHECK-NEXT: $vgpr3 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt:_(s32) = COPY $vgpr2 - %bw:_(s32) = G_CONSTANT i32 31 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %lshr:_(s32) = G_LSHR %b, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr3 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: %bw:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %shl:_(i32) = G_SHL %a, %amt(i32) + ; CHECK-NEXT: %sub:_(i32) = G_SUB %bw, %amt + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %b, %sub(i32) + ; CHECK-NEXT: %or:_(i32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr3 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt:_(i32) = COPY $vgpr2 + %bw:_(i32) = G_CONSTANT i32 31 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %lshr:_(i32) = G_LSHR %b, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr3 = COPY %or(i32) ... --- @@ -227,24 +227,24 @@ body: | ; CHECK-LABEL: name: fshl_i32_bad_amt_reg ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %amt1:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) - ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt1 - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %sub(s32) - ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr - ; CHECK-NEXT: $vgpr4 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %b:_(s32) = COPY $vgpr1 - %amt:_(s32) = COPY $vgpr2 - %amt1:_(s32) = COPY $vgpr3 - %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt1 - %lshr:_(s32) = G_LSHR %b, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr4 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: %amt1:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: %bw:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: %shl:_(i32) = G_SHL %a, %amt(i32) + ; CHECK-NEXT: %sub:_(i32) = G_SUB %bw, %amt1 + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %b, %sub(i32) + ; CHECK-NEXT: %or:_(i32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr4 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %b:_(i32) = COPY $vgpr1 + %amt:_(i32) = COPY $vgpr2 + %amt1:_(i32) = COPY $vgpr3 + %bw:_(i32) = G_CONSTANT i32 32 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt1 + %lshr:_(i32) = G_LSHR %b, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr4 = COPY %or(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir index 7bd51b87fbea4..0620685828dcd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir @@ -11,18 +11,22 @@ body: | ; CHECK-LABEL: name: test_f16_poszero_nsz ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: %res:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; CHECK-NEXT: $vgpr0 = COPY %res(s32) - %0:_(s32) = COPY $vgpr0 - %input:_(s16) = G_TRUNC %0 - %cst:_(s16) = G_FCONSTANT half 0.0 - %sub:_(s16) = nsz G_FSUB %cst, %input - %res:_(s32) = G_ANYEXT %sub - $vgpr0 = COPY %res + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %input(i16) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; CHECK-NEXT: %res:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY %res(i32) + %0:_(i32) = COPY $vgpr0 + %input:_(i16) = G_TRUNC %0(i32) + %cst:_(f16) = G_FCONSTANT half 0xH0000 + %3:_(f16) = G_BITCAST %input(i16) + %sub:_(f16) = nsz G_FSUB %cst, %3 + %5:_(i16) = G_BITCAST %sub(f16) + %res:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %res(i32) ... --- @@ -35,18 +39,22 @@ body: | ; CHECK-LABEL: name: test_f16_poszero_nonsz_nofold ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: %cst:_(s16) = G_FCONSTANT half 0xH0000 - ; CHECK-NEXT: %sub:_(s16) = G_FSUB %cst, %input - ; CHECK-NEXT: %res:_(s32) = G_ANYEXT %sub(s16) - ; CHECK-NEXT: $vgpr0 = COPY %res(s32) - %0:_(s32) = COPY $vgpr0 - %input:_(s16) = G_TRUNC %0 - %cst:_(s16) = G_FCONSTANT half 0.0 - %sub:_(s16) = G_FSUB %cst, %input - %res:_(s32) = G_ANYEXT %sub - $vgpr0 = COPY %res + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: %cst:_(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %input(i16) + ; CHECK-NEXT: %sub:_(f16) = G_FSUB %cst, [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %sub(f16) + ; CHECK-NEXT: %res:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY %res(i32) + %0:_(i32) = COPY $vgpr0 + %input:_(i16) = G_TRUNC %0(i32) + %cst:_(f16) = G_FCONSTANT half 0xH0000 + %3:_(f16) = G_BITCAST %input(i16) + %sub:_(f16) = G_FSUB %cst, %3 + %5:_(i16) = G_BITCAST %sub(f16) + %res:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %res(i32) ... --- @@ -59,18 +67,22 @@ body: | ; CHECK-LABEL: name: test_f16_negzero ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: %res:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; CHECK-NEXT: $vgpr0 = COPY %res(s32) - %0:_(s32) = COPY $vgpr0 - %input:_(s16) = G_TRUNC %0 - %cst:_(s16) = G_FCONSTANT half -0.0 - %sub:_(s16) = G_FSUB %cst, %input - %res:_(s32) = G_ANYEXT %sub - $vgpr0 = COPY %res + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %input(i16) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; CHECK-NEXT: %res:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY %res(i32) + %0:_(i32) = COPY $vgpr0 + %input:_(i16) = G_TRUNC %0(i32) + %cst:_(f16) = G_FCONSTANT half 0xH8000 + %3:_(f16) = G_BITCAST %input(i16) + %sub:_(f16) = G_FSUB %cst, %3 + %5:_(i16) = G_BITCAST %sub(f16) + %res:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %res(i32) ... --- @@ -83,14 +95,18 @@ body: | ; CHECK-LABEL: name: test_f32_poszero_nsz ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %input:_(s32) = COPY $vgpr0 - %cst:_(s32) = G_FCONSTANT float 0.0 - %sub:_(s32) = nsz G_FSUB %cst, %input - $vgpr0 = COPY %sub + ; CHECK-NEXT: %input:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %input(i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %input:_(i32) = COPY $vgpr0 + %cst:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %input(i32) + %sub:_(f32) = nsz G_FSUB %cst, %2 + %4:_(i32) = G_BITCAST %sub(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -103,14 +119,18 @@ body: | ; CHECK-LABEL: name: test_f32_poszero_nonsz_nofold ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: %sub:_(s32) = G_FSUB %cst, %input - ; CHECK-NEXT: $vgpr0 = COPY %sub(s32) - %input:_(s32) = COPY $vgpr0 - %cst:_(s32) = G_FCONSTANT float 0.0 - %sub:_(s32) = G_FSUB %cst, %input - $vgpr0 = COPY %sub + ; CHECK-NEXT: %input:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %cst:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %input(i32) + ; CHECK-NEXT: %sub:_(f32) = G_FSUB %cst, [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %sub(f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %input:_(i32) = COPY $vgpr0 + %cst:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %input(i32) + %sub:_(f32) = G_FSUB %cst, %2 + %4:_(i32) = G_BITCAST %sub(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -123,14 +143,18 @@ body: | ; CHECK-LABEL: name: test_f32_negzero ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %input:_(s32) = COPY $vgpr0 - %cst:_(s32) = G_FCONSTANT float -0.0 - %sub:_(s32) = G_FSUB %cst, %input - $vgpr0 = COPY %sub + ; CHECK-NEXT: %input:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %input(i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %input:_(i32) = COPY $vgpr0 + %cst:_(f32) = G_FCONSTANT float -0.000000e+00 + %2:_(f32) = G_BITCAST %input(i32) + %sub:_(f32) = G_FSUB %cst, %2 + %4:_(i32) = G_BITCAST %sub(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -143,14 +167,18 @@ body: | ; CHECK-LABEL: name: test_f64_poszero_nsz ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) - %input:_(s64) = COPY $vgpr0_vgpr1 - %cst:_(s64) = G_FCONSTANT double 0.0 - %sub:_(s64) = nsz G_FSUB %cst, %input - $vgpr0_vgpr1 = COPY %sub + ; CHECK-NEXT: %input:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %input(i64) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FCANONICALIZE]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %input:_(i64) = COPY $vgpr0_vgpr1 + %cst:_(f64) = G_FCONSTANT double 0.000000e+00 + %2:_(f64) = G_BITCAST %input(i64) + %sub:_(f64) = nsz G_FSUB %cst, %2 + %4:_(i64) = G_BITCAST %sub(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -163,14 +191,18 @@ body: | ; CHECK-LABEL: name: test_f64_poszero_nonsz_nofold ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %cst:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: %sub:_(s64) = G_FSUB %cst, %input - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %sub(s64) - %input:_(s64) = COPY $vgpr0_vgpr1 - %cst:_(s64) = G_FCONSTANT double 0.0 - %sub:_(s64) = G_FSUB %cst, %input - $vgpr0_vgpr1 = COPY %sub + ; CHECK-NEXT: %input:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %cst:_(f64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %input(i64) + ; CHECK-NEXT: %sub:_(f64) = G_FSUB %cst, [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST %sub(f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %input:_(i64) = COPY $vgpr0_vgpr1 + %cst:_(f64) = G_FCONSTANT double 0.000000e+00 + %2:_(f64) = G_BITCAST %input(i64) + %sub:_(f64) = G_FSUB %cst, %2 + %4:_(i64) = G_BITCAST %sub(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -183,14 +215,18 @@ body: | ; CHECK-LABEL: name: test_f64_negzero ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) - %input:_(s64) = COPY $vgpr0_vgpr1 - %cst:_(s64) = G_FCONSTANT double -0.0 - %sub:_(s64) = G_FSUB %cst, %input - $vgpr0_vgpr1 = COPY %sub + ; CHECK-NEXT: %input:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST %input(i64) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FCANONICALIZE]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %input:_(i64) = COPY $vgpr0_vgpr1 + %cst:_(f64) = G_FCONSTANT double -0.000000e+00 + %2:_(f64) = G_BITCAST %input(i64) + %sub:_(f64) = G_FSUB %cst, %2 + %4:_(i64) = G_BITCAST %sub(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -203,15 +239,19 @@ body: | ; CHECK-LABEL: name: test_v4f16_poszero_nsz ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s16>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s16>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](<4 x s16>) - %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %cst:_(s16) = G_FCONSTANT half 0.0 - %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst - %sub:_(<4 x s16>) = nsz G_FSUB %veccst, %input - $vgpr0_vgpr1 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST %input(<4 x i16>) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x f16>) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x f16>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FCANONICALIZE]](<4 x f16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) + %input:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %cst:_(f16) = G_FCONSTANT half 0xH0000 + %veccst:_(<4 x f16>) = G_BUILD_VECTOR %cst(f16), %cst(f16), %cst(f16), %cst(f16) + %3:_(<4 x f16>) = G_BITCAST %input(<4 x i16>) + %sub:_(<4 x f16>) = nsz G_FSUB %veccst, %3 + %5:_(<4 x i16>) = G_BITCAST %sub(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- @@ -224,16 +264,20 @@ body: | ; CHECK-LABEL: name: test_v4f16_poszero_nonsz_nofold ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %cst:_(s16) = G_FCONSTANT half 0xH0000 - ; CHECK-NEXT: %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst(s16), %cst(s16), %cst(s16), %cst(s16) - ; CHECK-NEXT: %sub:_(<4 x s16>) = G_FSUB %veccst, %input - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %sub(<4 x s16>) - %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %cst:_(s16) = G_FCONSTANT half 0.0 - %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst - %sub:_(<4 x s16>) = G_FSUB %veccst, %input - $vgpr0_vgpr1 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %cst:_(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: %veccst:_(<4 x f16>) = G_BUILD_VECTOR %cst(f16), %cst(f16), %cst(f16), %cst(f16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST %input(<4 x i16>) + ; CHECK-NEXT: %sub:_(<4 x f16>) = G_FSUB %veccst, [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST %sub(<4 x f16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) + %input:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %cst:_(f16) = G_FCONSTANT half 0xH0000 + %veccst:_(<4 x f16>) = G_BUILD_VECTOR %cst(f16), %cst(f16), %cst(f16), %cst(f16) + %3:_(<4 x f16>) = G_BITCAST %input(<4 x i16>) + %sub:_(<4 x f16>) = G_FSUB %veccst, %3 + %5:_(<4 x i16>) = G_BITCAST %sub(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- @@ -246,15 +290,19 @@ body: | ; CHECK-LABEL: name: test_v4f16_negzero ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s16>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s16>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](<4 x s16>) - %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %cst:_(s16) = G_FCONSTANT half -0.0 - %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst - %sub:_(<4 x s16>) = G_FSUB %veccst, %input - $vgpr0_vgpr1 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST %input(<4 x i16>) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x f16>) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x f16>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[FCANONICALIZE]](<4 x f16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) + %input:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %cst:_(f16) = G_FCONSTANT half 0xH8000 + %veccst:_(<4 x f16>) = G_BUILD_VECTOR %cst(f16), %cst(f16), %cst(f16), %cst(f16) + %3:_(<4 x f16>) = G_BITCAST %input(<4 x i16>) + %sub:_(<4 x f16>) = G_FSUB %veccst, %3 + %5:_(<4 x i16>) = G_BITCAST %sub(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- @@ -267,15 +315,19 @@ body: | ; CHECK-LABEL: name: test_v4f32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) - %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %cst:_(s32) = G_FCONSTANT float 0.0 - %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst - %sub:_(<4 x s32>) = nsz G_FSUB %veccst, %input - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x f32>) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x f32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FCANONICALIZE]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<4 x i32>) + %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(f32) = G_FCONSTANT float 0.000000e+00 + %veccst:_(<4 x f32>) = G_BUILD_VECTOR %cst(f32), %cst(f32), %cst(f32), %cst(f32) + %3:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + %sub:_(<4 x f32>) = nsz G_FSUB %veccst, %3 + %5:_(<4 x i32>) = G_BITCAST %sub(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<4 x i32>) ... --- @@ -288,15 +340,19 @@ body: | ; CHECK-LABEL: name: test_v4f32_negzero ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) - %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %cst:_(s32) = G_FCONSTANT float -0.0 - %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst - %sub:_(<4 x s32>) = G_FSUB %veccst, %input - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x f32>) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x f32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[FCANONICALIZE]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<4 x i32>) + %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(f32) = G_FCONSTANT float -0.000000e+00 + %veccst:_(<4 x f32>) = G_BUILD_VECTOR %cst(f32), %cst(f32), %cst(f32), %cst(f32) + %3:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + %sub:_(<4 x f32>) = G_FSUB %veccst, %3 + %5:_(<4 x i32>) = G_BITCAST %sub(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<4 x i32>) ... --- @@ -309,16 +365,30 @@ body: | ; CHECK-LABEL: name: test_v4f32_negzero_undef_elt ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) - %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %cst:_(s32) = G_FCONSTANT float -0.0 - %undef:_(s32) = G_IMPLICIT_DEF - %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %undef, %cst, %cst - %sub:_(<4 x s32>) = G_FSUB %veccst, %input - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: %cst:_(f32) = G_FCONSTANT float -0.000000e+00 + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %cst(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %cst(f32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST %cst(f32) + ; CHECK-NEXT: %veccst:_(<4 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), %undef(i32), [[BITCAST1]](i32), [[BITCAST2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST %veccst(<4 x i32>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + ; CHECK-NEXT: %sub:_(<4 x f32>) = G_FSUB [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<4 x i32>) = G_BITCAST %sub(<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<4 x i32>) + %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(f32) = G_FCONSTANT float -0.000000e+00 + %undef:_(i32) = G_IMPLICIT_DEF + %3:_(i32) = G_BITCAST %cst(f32) + %4:_(i32) = G_BITCAST %cst(f32) + %5:_(i32) = G_BITCAST %cst(f32) + %veccst:_(<4 x i32>) = G_BUILD_VECTOR %3(i32), %undef(i32), %4(i32), %5(i32) + %7:_(<4 x f32>) = G_BITCAST %veccst(<4 x i32>) + %8:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + %sub:_(<4 x f32>) = G_FSUB %7, %8 + %10:_(<4 x i32>) = G_BITCAST %sub(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %10(<4 x i32>) ... --- @@ -331,16 +401,30 @@ body: | ; CHECK-LABEL: name: test_v4f32_poszero_undef_elt ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) - %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %cst:_(s32) = G_FCONSTANT float 0.0 - %undef:_(s32) = G_IMPLICIT_DEF - %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %undef, %cst, %cst - %sub:_(<4 x s32>) = nsz G_FSUB %veccst, %input - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub + ; CHECK-NEXT: %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: %cst:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: %undef:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST %cst(f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %cst(f32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST %cst(f32) + ; CHECK-NEXT: %veccst:_(<4 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), %undef(i32), [[BITCAST1]](i32), [[BITCAST2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x f32>) = G_BITCAST %veccst(<4 x i32>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + ; CHECK-NEXT: %sub:_(<4 x f32>) = nsz G_FSUB [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<4 x i32>) = G_BITCAST %sub(<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<4 x i32>) + %input:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(f32) = G_FCONSTANT float 0.000000e+00 + %undef:_(i32) = G_IMPLICIT_DEF + %3:_(i32) = G_BITCAST %cst(f32) + %4:_(i32) = G_BITCAST %cst(f32) + %5:_(i32) = G_BITCAST %cst(f32) + %veccst:_(<4 x i32>) = G_BUILD_VECTOR %3(i32), %undef(i32), %4(i32), %5(i32) + %7:_(<4 x f32>) = G_BITCAST %veccst(<4 x i32>) + %8:_(<4 x f32>) = G_BITCAST %input(<4 x i32>) + %sub:_(<4 x f32>) = nsz G_FSUB %7, %8 + %10:_(<4 x i32>) = G_BITCAST %sub(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %10(<4 x i32>) ... --- @@ -353,15 +437,19 @@ body: | ; CHECK-LABEL: name: test_v2f64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s64>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<2 x s64>) - %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %cst:_(s64) = G_FCONSTANT double 0.0 - %veccst:_(<2 x s64>) = G_BUILD_VECTOR %cst, %cst - %sub:_(<2 x s64>) = nsz G_FSUB %veccst, %input - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub + ; CHECK-NEXT: %input:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST %input(<2 x i64>) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f64>) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f64>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[FCANONICALIZE]](<2 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %input:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(f64) = G_FCONSTANT double 0.000000e+00 + %veccst:_(<2 x f64>) = G_BUILD_VECTOR %cst(f64), %cst(f64) + %3:_(<2 x f64>) = G_BITCAST %input(<2 x i64>) + %sub:_(<2 x f64>) = nsz G_FSUB %veccst, %3 + %5:_(<2 x i64>) = G_BITCAST %sub(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... --- @@ -374,14 +462,17 @@ body: | ; CHECK-LABEL: name: test_v2f64_negzero ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG %input - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s64>) = G_FCANONICALIZE [[FNEG]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<2 x s64>) - %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %cst:_(s64) = G_FCONSTANT double -0.0 - %veccst:_(<2 x s64>) = G_BUILD_VECTOR %cst, %cst - %sub:_(<2 x s64>) = G_FSUB %veccst, %input - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub + ; CHECK-NEXT: %input:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST %input(<2 x i64>) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f64>) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f64>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[FCANONICALIZE]](<2 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %input:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(f64) = G_FCONSTANT double -0.000000e+00 + %veccst:_(<2 x f64>) = G_BUILD_VECTOR %cst(f64), %cst(f64) + %3:_(<2 x f64>) = G_BITCAST %input(<2 x i64>) + %sub:_(<2 x f64>) = G_FSUB %veccst, %3 + %5:_(<2 x i64>) = G_BITCAST %sub(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir index d6135d86022be..f67204dfa85b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir @@ -11,16 +11,18 @@ body: | ; CHECK-LABEL: name: uitofp_char_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_UITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_AND %0, %1 + %3:_(f32) = G_UITOFP %2(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -33,16 +35,18 @@ body: | ; CHECK-LABEL: name: uitofp_too_many_bits_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 256 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_UITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 256 + %2:_(i32) = G_AND %0, %1 + %3:_(f32) = G_UITOFP %2(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -55,16 +59,18 @@ body: | ; CHECK-LABEL: name: sitofp_char_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_SITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_AND %0, %1 + %3:_(f32) = G_SITOFP %2(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -77,16 +83,18 @@ body: | ; CHECK-LABEL: name: sitofp_bits127_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 127 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_SITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 127 + %2:_(i32) = G_AND %0, %1 + %3:_(f32) = G_SITOFP %2(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -99,16 +107,18 @@ body: | ; CHECK-LABEL: name: sitofp_bits128_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 128 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_SITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 128 + %2:_(i32) = G_AND %0, %1 + %3:_(f32) = G_SITOFP %2(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- name: sitofp_too_many_bits_to_f32 @@ -120,16 +130,18 @@ body: | ; CHECK-LABEL: name: sitofp_too_many_bits_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[AND]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 256 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_SITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[AND]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 256 + %2:_(i32) = G_AND %0, %1 + %3:_(f32) = G_SITOFP %2(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -142,19 +154,21 @@ body: | ; CHECK-LABEL: name: uitofp_char_to_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %0, %1 - %3:_(s16) = G_UITOFP %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_AND %0, %1 + %3:_(f16) = G_UITOFP %2(i32) + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -167,19 +181,21 @@ body: | ; CHECK-LABEL: name: sitofp_char_to_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %0, %1 - %3:_(s16) = G_SITOFP %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_AND %0, %1 + %3:_(f16) = G_SITOFP %2(i32) + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -192,17 +208,19 @@ body: | ; CHECK-LABEL: name: uitofp_s64_char_to_f32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 255 - %2:_(s64) = G_AND %0, %1 - %3:_(s32) = G_UITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 255 + %2:_(i64) = G_AND %0, %1 + %3:_(f32) = G_UITOFP %2(i64) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -215,17 +233,19 @@ body: | ; CHECK-LABEL: name: sitofp_s64_char_to_f32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 255 - %2:_(s64) = G_AND %0, %1 - %3:_(s32) = G_SITOFP %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 255 + %2:_(i64) = G_AND %0, %1 + %3:_(f32) = G_SITOFP %2(i64) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -238,19 +258,21 @@ body: | ; CHECK-LABEL: name: uitofp_s16_char_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[ANYEXT]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CONSTANT i16 255 - %3:_(s16) = G_AND %1, %2 - %4:_(s32) = G_UITOFP %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[AND]](i16) + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[ANYEXT]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CONSTANT i16 255 + %3:_(i16) = G_AND %1, %2 + %4:_(f32) = G_UITOFP %3(i16) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -263,17 +285,19 @@ body: | ; CHECK-LABEL: name: sitofp_s16_char_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[ANYEXT]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CONSTANT i16 255 - %3:_(s16) = G_AND %1, %2 - %4:_(s32) = G_SITOFP %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[AND]](i16) + ; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(f32) = G_AMDGPU_CVT_F32_UBYTE0 [[ANYEXT]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AMDGPU_CVT_F32_UBYTE0_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CONSTANT i16 255 + %3:_(i16) = G_AND %1, %2 + %4:_(f32) = G_SITOFP %3(i16) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir index 17537f1d9a067..2919ad1dabd8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir @@ -11,14 +11,14 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_32_s64amt ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 32 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 32 + %2:_(i64) = G_LSHR %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -31,14 +31,14 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 32 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 32 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -51,16 +51,16 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 33 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LSHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -73,14 +73,14 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_31 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 31 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -93,16 +93,16 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_63 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 63 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LSHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 63 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -115,12 +115,12 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 64 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 64 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -133,12 +133,12 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s64_65 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 65 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 65 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -151,14 +151,14 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s32_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 16 - %2:_(s32) = G_LSHR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 16 + %2:_(i32) = G_LSHR %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -171,14 +171,14 @@ body: | ; CHECK-LABEL: name: narrow_lshr_s32_17 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 17 - %2:_(s32) = G_LSHR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 17 + %2:_(i32) = G_LSHR %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -191,14 +191,14 @@ body: | ; CHECK-LABEL: name: narrow_lshr_v2s32_17 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 17 - %2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1 - %3:_(<2 x s32>) = G_LSHR %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 17 + %2:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %1(i32) + %3:_(<2 x i32>) = G_LSHR %0, %2(<2 x i32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-or-redundant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-or-redundant.mir index fbe1b778c7bd8..39225e95943f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-or-redundant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-or-redundant.mir @@ -7,13 +7,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_const_const_1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: $sgpr0 = COPY [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: $sgpr0 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = G_CONSTANT i32 255 - %1:_(s32) = G_CONSTANT i32 15 - %2:_(s32) = G_OR %0(s32), %1(s32) - $sgpr0 = COPY %2(s32) + %0:_(i32) = G_CONSTANT i32 255 + %1:_(i32) = G_CONSTANT i32 15 + %2:_(i32) = G_OR %0, %1 + $sgpr0 = COPY %2(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -23,13 +23,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_const_const_2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = G_CONSTANT i32 15 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_OR %0(s32), %1(s32) - $vgpr0 = COPY %2(s32) + %0:_(i32) = G_CONSTANT i32 15 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_OR %0, %1 + $vgpr0 = COPY %2(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -39,13 +39,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_const_const_3 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1431655765 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1431655765 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = G_CONSTANT i32 1431655765 - %1:_(s32) = G_CONSTANT i32 1145324612 - %2:_(s32) = G_OR %1(s32), %0(s32) - $vgpr0 = COPY %2(s32) + %0:_(i32) = G_CONSTANT i32 1431655765 + %1:_(i32) = G_CONSTANT i32 1145324612 + %2:_(i32) = G_OR %1, %0 + $vgpr0 = COPY %2(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -59,17 +59,17 @@ body: | ; CHECK-LABEL: name: test_or_or ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_CONSTANT i32 15 - %3:_(s32) = G_OR %0, %1(s32) - %4:_(s32) = G_OR %3, %2 - $vgpr0 = COPY %4(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_CONSTANT i32 15 + %3:_(i32) = G_OR %0, %1 + %4:_(i32) = G_OR %3, %2 + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -83,21 +83,21 @@ body: | ; CHECK-LABEL: name: test_shl_xor_or ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 5 - %2:_(s32) = G_CONSTANT i32 -1 - %3:_(s32) = G_CONSTANT i32 31 - %4:_(s32) = G_SHL %0, %1(s32) - %5:_(s32) = G_XOR %4(s32), %2(s32) - %6:_(s32) = G_OR %5(s32), %3(s32) - $sgpr0 = COPY %6(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 -1 + %3:_(i32) = G_CONSTANT i32 31 + %4:_(i32) = G_SHL %0, %1(i32) + %5:_(i32) = G_XOR %4, %2 + %6:_(i32) = G_OR %5, %3 + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -111,21 +111,21 @@ body: | ; CHECK-LABEL: name: test_lshr_xor_or ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[LSHR]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[LSHR]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 5 - %2:_(s32) = G_CONSTANT i32 -1 - %3:_(s32) = G_CONSTANT i32 4160749568 - %4:_(s32) = G_LSHR %0, %1(s32) - %5:_(s32) = G_XOR %4(s32), %2(s32) - %6:_(s32) = G_OR %5(s32), %3(s32) - $vgpr0 = COPY %6(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 -1 + %3:_(i32) = G_CONSTANT i32 -134217728 + %4:_(i32) = G_LSHR %0, %1(i32) + %5:_(i32) = G_XOR %4, %2 + %6:_(i32) = G_OR %5, %3 + $vgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -139,23 +139,23 @@ body: | ; CHECK-LABEL: name: test_or_non_const ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[LSHR]], [[C1]] - ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[LSHR]], [[C1]] + ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_CONSTANT i32 -1 - %4:_(s32) = G_CONSTANT i32 4294901760 - %5:_(s32) = G_LSHR %0, %2(s32) - %6:_(s32) = G_XOR %5, %3(s32) - %7:_(s32) = G_AND %1, %4(s32) - %8:_(s32) = G_OR %6, %7 - $sgpr0 = COPY %8(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_CONSTANT i32 16 + %3:_(i32) = G_CONSTANT i32 -1 + %4:_(i32) = G_CONSTANT i32 -65536 + %5:_(i32) = G_LSHR %0, %2(i32) + %6:_(i32) = G_XOR %5, %3 + %7:_(i32) = G_AND %1, %4 + %8:_(i32) = G_OR %6, %7 + $sgpr0 = COPY %8(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... --- @@ -164,16 +164,16 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: vector_const_splat_const_splat - ; CHECK: %mask:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask(s16), %mask(s16) - ; CHECK-NEXT: $vgpr0 = COPY %c2(<2 x s16>) + ; CHECK: %mask:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + ; CHECK-NEXT: $vgpr0 = COPY %c2(<2 x i16>) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %mask:_(s16) = G_CONSTANT i16 255 - %fifteen:_(s16) = G_CONSTANT i16 15 - %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen, %fifteen - %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask, %mask - %and:_(<2 x s16>) = G_OR %c1(<2 x s16>), %c2(<2 x s16>) - $vgpr0 = COPY %and(<2 x s16>) + %mask:_(i16) = G_CONSTANT i16 255 + %fifteen:_(i16) = G_CONSTANT i16 15 + %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %fifteen(i16) + %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + %and:_(<2 x i16>) = G_OR %c1, %c2 + $vgpr0 = COPY %and(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0 ... --- @@ -182,17 +182,17 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: vector_const_valid_not_splat - ; CHECK: %mask:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask(s16), %mask(s16) - ; CHECK-NEXT: $vgpr0 = COPY %c2(<2 x s16>) + ; CHECK: %mask:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + ; CHECK-NEXT: $vgpr0 = COPY %c2(<2 x i16>) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %fifteen:_(s16) = G_CONSTANT i16 15 - %sixteen:_(s16) = G_CONSTANT i16 16 - %mask:_(s16) = G_CONSTANT i16 255 - %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen, %sixteen - %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask, %mask - %and:_(<2 x s16>) = G_OR %c1(<2 x s16>), %c2(<2 x s16>) - $vgpr0 = COPY %and(<2 x s16>) + %fifteen:_(i16) = G_CONSTANT i16 15 + %sixteen:_(i16) = G_CONSTANT i16 16 + %mask:_(i16) = G_CONSTANT i16 255 + %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %sixteen(i16) + %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + %and:_(<2 x i16>) = G_OR %c1, %c2 + $vgpr0 = COPY %and(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0 ... --- @@ -201,20 +201,20 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: vector_dont_combine_const_too_wide - ; CHECK: %fifteen:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: %too_wide:_(s16) = G_CONSTANT i16 257 - ; CHECK-NEXT: %mask:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen(s16), %too_wide(s16) - ; CHECK-NEXT: %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask(s16), %mask(s16) - ; CHECK-NEXT: %and:_(<2 x s16>) = G_OR %c1, %c2 - ; CHECK-NEXT: $vgpr0 = COPY %and(<2 x s16>) + ; CHECK: %fifteen:_(i16) = G_CONSTANT i16 15 + ; CHECK-NEXT: %too_wide:_(i16) = G_CONSTANT i16 257 + ; CHECK-NEXT: %mask:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %too_wide(i16) + ; CHECK-NEXT: %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + ; CHECK-NEXT: %and:_(<2 x i16>) = G_OR %c1, %c2 + ; CHECK-NEXT: $vgpr0 = COPY %and(<2 x i16>) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %fifteen:_(s16) = G_CONSTANT i16 15 - %too_wide:_(s16) = G_CONSTANT i16 257 - %mask:_(s16) = G_CONSTANT i16 255 - %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen, %too_wide - %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask, %mask - %and:_(<2 x s16>) = G_OR %c1(<2 x s16>), %c2(<2 x s16>) - $vgpr0 = COPY %and(<2 x s16>) + %fifteen:_(i16) = G_CONSTANT i16 15 + %too_wide:_(i16) = G_CONSTANT i16 257 + %mask:_(i16) = G_CONSTANT i16 255 + %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %too_wide(i16) + %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + %and:_(<2 x i16>) = G_OR %c1, %c2 + $vgpr0 = COPY %and(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir index 79c1470f94cec..6443302fd37b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir @@ -7,13 +7,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_const_const - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: $sgpr0 = COPY [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: $sgpr0 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = G_CONSTANT i32 15 - %1:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %0(s32), %1(s32) - $sgpr0 = COPY %2(s32) + %0:_(i32) = G_CONSTANT i32 15 + %1:_(i32) = G_CONSTANT i32 255 + %2:_(i32) = G_AND %0, %1 + $sgpr0 = COPY %2(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -23,13 +23,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_const_const_2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: $sgpr0 = COPY [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: $sgpr0 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = G_CONSTANT i32 255 - %1:_(s32) = G_CONSTANT i32 15 - %2:_(s32) = G_AND %0(s32), %1(s32) - $sgpr0 = COPY %2(s32) + %0:_(i32) = G_CONSTANT i32 255 + %1:_(i32) = G_CONSTANT i32 15 + %2:_(i32) = G_AND %0, %1 + $sgpr0 = COPY %2(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -39,13 +39,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_const_const_3 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1431655766 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = G_CONSTANT i32 2863311530 - %1:_(s32) = G_CONSTANT i32 4008636142 - %2:_(s32) = G_AND %0(s32), %1(s32) - $vgpr0 = COPY %2(s32) + %0:_(i32) = G_CONSTANT i32 -1431655766 + %1:_(i32) = G_CONSTANT i32 -286331154 + %2:_(i32) = G_AND %0, %1 + $vgpr0 = COPY %2(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -59,17 +59,17 @@ body: | ; CHECK-LABEL: name: test_and_and ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 15 - %2:_(s32) = G_CONSTANT i32 255 - %3:_(s32) = G_AND %0, %1(s32) - %4:_(s32) = G_AND %3, %2 - $vgpr0 = COPY %4(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 15 + %2:_(i32) = G_CONSTANT i32 255 + %3:_(i32) = G_AND %0, %1 + %4:_(i32) = G_AND %3, %2 + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -83,17 +83,17 @@ body: | ; CHECK-LABEL: name: test_shl_and ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[SHL]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[SHL]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 5 - %2:_(s32) = G_CONSTANT i32 4294967264 - %3:_(s32) = G_SHL %0, %1(s32) - %4:_(s32) = G_AND %3, %2 - $sgpr0 = COPY %4(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 -32 + %3:_(i32) = G_SHL %0, %1(i32) + %4:_(i32) = G_AND %3, %2 + $sgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -107,17 +107,17 @@ body: | ; CHECK-LABEL: name: test_lshr_and ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 5 - %2:_(s32) = G_CONSTANT i32 134217727 - %3:_(s32) = G_LSHR %0, %1(s32) - %4:_(s32) = G_AND %3, %2 - $vgpr0 = COPY %4(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 134217727 + %3:_(i32) = G_LSHR %0, %1(i32) + %4:_(i32) = G_AND %3, %2 + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -131,19 +131,19 @@ body: | ; CHECK-LABEL: name: test_and_non_const ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[LSHR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_CONSTANT i32 65535 - %4:_(s32) = G_OR %1, %3 - %5:_(s32) = G_LSHR %0, %2(s32) - %6:_(s32) = G_AND %5, %4 - $sgpr0 = COPY %6(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_CONSTANT i32 16 + %3:_(i32) = G_CONSTANT i32 65535 + %4:_(i32) = G_OR %1, %3 + %5:_(i32) = G_LSHR %0, %2(i32) + %6:_(i32) = G_AND %5, %4 + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... --- @@ -152,19 +152,19 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_sext_inreg - ; CHECK: %cst_1:_(s32) = G_CONSTANT i32 -5 - ; CHECK-NEXT: $sgpr0 = COPY %cst_1(s32) + ; CHECK: %cst_1:_(i32) = G_CONSTANT i32 -5 + ; CHECK-NEXT: $sgpr0 = COPY %cst_1(i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %cst_1:_(s32) = G_CONSTANT i32 -5 + %cst_1:_(i32) = G_CONSTANT i32 -5 ; 000 ... 1011 - %cst_11:_(s32) = G_CONSTANT i32 11 + %cst_11:_(i32) = G_CONSTANT i32 11 ; Sext from the 4th bit -> 111 ... 1011 = -5 - %sext_inreg_11:_(s32) = G_SEXT_INREG %cst_11, 4 + %sext_inreg_11:_(i32) = G_SEXT_INREG %cst_11, 4 - %and:_(s32) = G_AND %cst_1(s32), %sext_inreg_11(s32) - $sgpr0 = COPY %and(s32) + %and:_(i32) = G_AND %cst_1, %sext_inreg_11 + $sgpr0 = COPY %and(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... --- @@ -173,16 +173,16 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: vector_const_splat_const_splat - ; CHECK: %fifteen:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen(s16), %fifteen(s16) - ; CHECK-NEXT: $vgpr0 = COPY %c1(<2 x s16>) + ; CHECK: %fifteen:_(i16) = G_CONSTANT i16 15 + ; CHECK-NEXT: %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %fifteen(i16) + ; CHECK-NEXT: $vgpr0 = COPY %c1(<2 x i16>) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %fifteen:_(s16) = G_CONSTANT i16 15 - %mask:_(s16) = G_CONSTANT i16 255 - %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen, %fifteen - %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask, %mask - %and:_(<2 x s16>) = G_AND %c1(<2 x s16>), %c2(<2 x s16>) - $vgpr0 = COPY %and(<2 x s16>) + %fifteen:_(i16) = G_CONSTANT i16 15 + %mask:_(i16) = G_CONSTANT i16 255 + %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %fifteen(i16) + %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + %and:_(<2 x i16>) = G_AND %c1, %c2 + $vgpr0 = COPY %and(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0 ... --- @@ -191,18 +191,18 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: vector_const_valid_not_splat - ; CHECK: %fifteen:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: %sixteen:_(s16) = G_CONSTANT i16 16 - ; CHECK-NEXT: %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen(s16), %sixteen(s16) - ; CHECK-NEXT: $vgpr0 = COPY %c1(<2 x s16>) + ; CHECK: %fifteen:_(i16) = G_CONSTANT i16 15 + ; CHECK-NEXT: %sixteen:_(i16) = G_CONSTANT i16 16 + ; CHECK-NEXT: %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %sixteen(i16) + ; CHECK-NEXT: $vgpr0 = COPY %c1(<2 x i16>) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %fifteen:_(s16) = G_CONSTANT i16 15 - %sixteen:_(s16) = G_CONSTANT i16 16 - %mask:_(s16) = G_CONSTANT i16 255 - %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen, %sixteen - %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask, %mask - %and:_(<2 x s16>) = G_AND %c1(<2 x s16>), %c2(<2 x s16>) - $vgpr0 = COPY %and(<2 x s16>) + %fifteen:_(i16) = G_CONSTANT i16 15 + %sixteen:_(i16) = G_CONSTANT i16 16 + %mask:_(i16) = G_CONSTANT i16 255 + %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %sixteen(i16) + %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + %and:_(<2 x i16>) = G_AND %c1, %c2 + $vgpr0 = COPY %and(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0 ... --- @@ -211,20 +211,20 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: vector_dont_combine_const_too_wide - ; CHECK: %fifteen:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: %too_wide:_(s16) = G_CONSTANT i16 257 - ; CHECK-NEXT: %mask:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen(s16), %too_wide(s16) - ; CHECK-NEXT: %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask(s16), %mask(s16) - ; CHECK-NEXT: %and:_(<2 x s16>) = G_AND %c1, %c2 - ; CHECK-NEXT: $vgpr0 = COPY %and(<2 x s16>) + ; CHECK: %fifteen:_(i16) = G_CONSTANT i16 15 + ; CHECK-NEXT: %too_wide:_(i16) = G_CONSTANT i16 257 + ; CHECK-NEXT: %mask:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %too_wide(i16) + ; CHECK-NEXT: %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + ; CHECK-NEXT: %and:_(<2 x i16>) = G_AND %c1, %c2 + ; CHECK-NEXT: $vgpr0 = COPY %and(<2 x i16>) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %fifteen:_(s16) = G_CONSTANT i16 15 - %too_wide:_(s16) = G_CONSTANT i16 257 - %mask:_(s16) = G_CONSTANT i16 255 - %c1:_(<2 x s16>) = G_BUILD_VECTOR %fifteen, %too_wide - %c2:_(<2 x s16>) = G_BUILD_VECTOR %mask, %mask - %and:_(<2 x s16>) = G_AND %c1(<2 x s16>), %c2(<2 x s16>) - $vgpr0 = COPY %and(<2 x s16>) + %fifteen:_(i16) = G_CONSTANT i16 15 + %too_wide:_(i16) = G_CONSTANT i16 257 + %mask:_(i16) = G_CONSTANT i16 255 + %c1:_(<2 x i16>) = G_BUILD_VECTOR %fifteen(i16), %too_wide(i16) + %c2:_(<2 x i16>) = G_BUILD_VECTOR %mask(i16), %mask(i16) + %and:_(<2 x i16>) = G_AND %c1, %c2 + $vgpr0 = COPY %and(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-neg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-neg.mir index b56808892d62e..b100ab0babee2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-neg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-neg.mir @@ -10,15 +10,21 @@ body: | ; CHECK-LABEL: name: test_add_rhs ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FNEG %1 - %3:_(s32) = G_FADD %0, %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST1]], [[BITCAST]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_FNEG %2 + %4:_(f32) = G_BITCAST %0(i32) + %5:_(f32) = G_FADD %4, %3 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -30,15 +36,21 @@ body: | ; CHECK-LABEL: name: test_add_lhs ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY1]], [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FNEG %0 - %3:_(s32) = G_FADD %2, %1 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST1]], [[BITCAST]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FNEG %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FADD %3, %4 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -50,15 +62,21 @@ body: | ; CHECK-LABEL: name: test_sub ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FNEG %1 - %3:_(s32) = G_FSUB %0, %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST1]], [[BITCAST]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_FNEG %2 + %4:_(f32) = G_BITCAST %0(i32) + %5:_(f32) = G_FSUB %4, %3 + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -70,16 +88,22 @@ body: | ; CHECK-LABEL: name: test_mul ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FNEG %0 - %3:_(s32) = G_FNEG %1 - %4:_(s32) = G_FMUL %2, %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FNEG %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FNEG %4 + %6:_(f32) = G_FMUL %3, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -91,16 +115,22 @@ body: | ; CHECK-LABEL: name: test_div ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FDIV]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FNEG %0 - %3:_(s32) = G_FNEG %1 - %4:_(s32) = G_FDIV %2, %3 - $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(f32) = G_FDIV [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FDIV]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FNEG %2 + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_FNEG %4 + %6:_(f32) = G_FDIV %3, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -112,18 +142,26 @@ body: | ; CHECK-LABEL: name: test_fmad ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FNEG %0 - %4:_(s32) = G_FNEG %1 - %5:_(s32) = G_FMAD %3, %4, %2 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_FNEG %3 + %5:_(f32) = G_BITCAST %1(i32) + %6:_(f32) = G_FNEG %5 + %7:_(f32) = G_BITCAST %2(i32) + %8:_(f32) = G_FMAD %4, %6, %7 + %9:_(i32) = G_BITCAST %8(f32) + $vgpr0 = COPY %9(i32) ... --- @@ -135,17 +173,25 @@ body: | ; CHECK-LABEL: name: test_fma ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FNEG %0 - %4:_(s32) = G_FNEG %1 - %5:_(s32) = G_FMA %3, %4, %2 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_FNEG %3 + %5:_(f32) = G_BITCAST %1(i32) + %6:_(f32) = G_FNEG %5 + %7:_(f32) = G_BITCAST %2(i32) + %8:_(f32) = G_FMA %4, %6, %7 + %9:_(i32) = G_BITCAST %8(f32) + $vgpr0 = COPY %9(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir index b22a59c3fef79..e92ffff9084b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir @@ -11,18 +11,18 @@ body: | ; CHECK-LABEL: name: rotl_i32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %or:_(s32) = G_ROTL %a, %amt(s32) - ; CHECK-NEXT: $vgpr2 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt:_(s32) = COPY $vgpr1 - %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %lshr:_(s32) = G_LSHR %a, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr2 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %or:_(i32) = G_ROTL %a, %amt(i32) + ; CHECK-NEXT: $vgpr2 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt:_(i32) = COPY $vgpr1 + %bw:_(i32) = G_CONSTANT i32 32 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %lshr:_(i32) = G_LSHR %a, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr2 = COPY %or(i32) ... --- @@ -35,19 +35,19 @@ body: | ; CHECK-LABEL: name: rotl_v2i32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %amt:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %or:_(<2 x s32>) = G_ROTL %a, %amt(<2 x s32>) - ; CHECK-NEXT: $vgpr4_vgpr5 = COPY %or(<2 x s32>) - %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %amt:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %scalar_bw:_(s32) = G_CONSTANT i32 32 - %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw, %scalar_bw - %shl:_(<2 x s32>) = G_SHL %a, %amt - %sub:_(<2 x s32>) = G_SUB %bw, %amt - %lshr:_(<2 x s32>) = G_LSHR %a, %sub - %or:_(<2 x s32>) = G_OR %shl, %lshr - $vgpr4_vgpr5 = COPY %or + ; CHECK-NEXT: %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %amt:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %or:_(<2 x i32>) = G_ROTL %a, %amt(<2 x i32>) + ; CHECK-NEXT: $vgpr4_vgpr5 = COPY %or(<2 x i32>) + %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %amt:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %scalar_bw:_(i32) = G_CONSTANT i32 32 + %bw:_(<2 x i32>) = G_BUILD_VECTOR %scalar_bw(i32), %scalar_bw(i32) + %shl:_(<2 x i32>) = G_SHL %a, %amt(<2 x i32>) + %sub:_(<2 x i32>) = G_SUB %bw, %amt + %lshr:_(<2 x i32>) = G_LSHR %a, %sub(<2 x i32>) + %or:_(<2 x i32>) = G_OR %shl, %lshr + $vgpr4_vgpr5 = COPY %or(<2 x i32>) ... --- @@ -60,18 +60,18 @@ body: | ; CHECK-LABEL: name: rotl_commute_i32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %or:_(s32) = G_ROTL %a, %amt(s32) - ; CHECK-NEXT: $vgpr2 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt:_(s32) = COPY $vgpr1 - %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %lshr:_(s32) = G_LSHR %a, %sub - %or:_(s32) = G_OR %lshr, %shl - $vgpr2 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %or:_(i32) = G_ROTL %a, %amt(i32) + ; CHECK-NEXT: $vgpr2 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt:_(i32) = COPY $vgpr1 + %bw:_(i32) = G_CONSTANT i32 32 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %lshr:_(i32) = G_LSHR %a, %sub(i32) + %or:_(i32) = G_OR %lshr, %shl + $vgpr2 = COPY %or(i32) ... --- @@ -84,18 +84,18 @@ body: | ; CHECK-LABEL: name: rotr_i32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %or:_(s32) = G_ROTR %a, %amt(s32) - ; CHECK-NEXT: $vgpr2 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt:_(s32) = COPY $vgpr1 - %bw:_(s32) = G_CONSTANT i32 32 - %lshr:_(s32) = G_LSHR %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %shl:_(s32) = G_SHL %a, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr2 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %or:_(i32) = G_ROTR %a, %amt(i32) + ; CHECK-NEXT: $vgpr2 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt:_(i32) = COPY $vgpr1 + %bw:_(i32) = G_CONSTANT i32 32 + %lshr:_(i32) = G_LSHR %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %shl:_(i32) = G_SHL %a, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr2 = COPY %or(i32) ... --- @@ -108,17 +108,17 @@ body: | ; CHECK-LABEL: name: rot_i32_const ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: %or:_(s32) = G_ROTR %a, %amt1(s32) - ; CHECK-NEXT: $vgpr1 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt0:_(s32) = G_CONSTANT i32 20 - %amt1:_(s32) = G_CONSTANT i32 12 - %shl:_(s32) = G_SHL %a, %amt0 - %lshr:_(s32) = G_LSHR %a, %amt1 - %or:_(s32) = G_OR %shl, %lshr - $vgpr1 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt1:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %or:_(i32) = G_ROTR %a, %amt1(i32) + ; CHECK-NEXT: $vgpr1 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt0:_(i32) = G_CONSTANT i32 20 + %amt1:_(i32) = G_CONSTANT i32 12 + %shl:_(i32) = G_SHL %a, %amt0(i32) + %lshr:_(i32) = G_LSHR %a, %amt1(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr1 = COPY %or(i32) ... --- @@ -131,20 +131,20 @@ body: | ; CHECK-LABEL: name: rot_v2i32_const ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %scalar_amt1:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: %amt1:_(<2 x s32>) = G_BUILD_VECTOR %scalar_amt1(s32), %scalar_amt1(s32) - ; CHECK-NEXT: %or:_(<2 x s32>) = G_ROTR %a, %amt1(<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY %or(<2 x s32>) - %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %scalar_amt0:_(s32) = G_CONSTANT i32 20 - %amt0:_(<2 x s32>) = G_BUILD_VECTOR %scalar_amt0, %scalar_amt0 - %scalar_amt1:_(s32) = G_CONSTANT i32 12 - %amt1:_(<2 x s32>) = G_BUILD_VECTOR %scalar_amt1, %scalar_amt1 - %shl:_(<2 x s32>) = G_SHL %a, %amt0 - %lshr:_(<2 x s32>) = G_LSHR %a, %amt1 - %or:_(<2 x s32>) = G_OR %shl, %lshr - $vgpr2_vgpr3 = COPY %or + ; CHECK-NEXT: %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %scalar_amt1:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %amt1:_(<2 x i32>) = G_BUILD_VECTOR %scalar_amt1(i32), %scalar_amt1(i32) + ; CHECK-NEXT: %or:_(<2 x i32>) = G_ROTR %a, %amt1(<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY %or(<2 x i32>) + %a:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %scalar_amt0:_(i32) = G_CONSTANT i32 20 + %amt0:_(<2 x i32>) = G_BUILD_VECTOR %scalar_amt0(i32), %scalar_amt0(i32) + %scalar_amt1:_(i32) = G_CONSTANT i32 12 + %amt1:_(<2 x i32>) = G_BUILD_VECTOR %scalar_amt1(i32), %scalar_amt1(i32) + %shl:_(<2 x i32>) = G_SHL %a, %amt0(<2 x i32>) + %lshr:_(<2 x i32>) = G_LSHR %a, %amt1(<2 x i32>) + %or:_(<2 x i32>) = G_OR %shl, %lshr + $vgpr2_vgpr3 = COPY %or(<2 x i32>) ... --- @@ -157,20 +157,20 @@ body: | ; CHECK-LABEL: name: rot_i32_bad_const ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt0:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt0(s32) - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %amt1(s32) - ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr - ; CHECK-NEXT: $vgpr1 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt0:_(s32) = G_CONSTANT i32 20 - %amt1:_(s32) = G_CONSTANT i32 11 - %shl:_(s32) = G_SHL %a, %amt0 - %lshr:_(s32) = G_LSHR %a, %amt1 - %or:_(s32) = G_OR %shl, %lshr - $vgpr1 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt0:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %amt1:_(i32) = G_CONSTANT i32 11 + ; CHECK-NEXT: %shl:_(i32) = G_SHL %a, %amt0(i32) + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %a, %amt1(i32) + ; CHECK-NEXT: %or:_(i32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr1 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt0:_(i32) = G_CONSTANT i32 20 + %amt1:_(i32) = G_CONSTANT i32 11 + %shl:_(i32) = G_SHL %a, %amt0(i32) + %lshr:_(i32) = G_LSHR %a, %amt1(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr1 = COPY %or(i32) ... @@ -184,22 +184,22 @@ body: | ; CHECK-LABEL: name: rotl_i32_bad_bw ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) - ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %sub(s32) - ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr - ; CHECK-NEXT: $vgpr2 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt:_(s32) = COPY $vgpr1 - %bw:_(s32) = G_CONSTANT i32 31 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt - %lshr:_(s32) = G_LSHR %a, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr2 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %bw:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %shl:_(i32) = G_SHL %a, %amt(i32) + ; CHECK-NEXT: %sub:_(i32) = G_SUB %bw, %amt + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %a, %sub(i32) + ; CHECK-NEXT: %or:_(i32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr2 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt:_(i32) = COPY $vgpr1 + %bw:_(i32) = G_CONSTANT i32 31 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt + %lshr:_(i32) = G_LSHR %a, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr2 = COPY %or(i32) ... --- @@ -212,22 +212,22 @@ body: | ; CHECK-LABEL: name: rotl_i32_bad_amt_reg ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %amt1:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) - ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt1 - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %sub(s32) - ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr - ; CHECK-NEXT: $vgpr3 = COPY %or(s32) - %a:_(s32) = COPY $vgpr0 - %amt:_(s32) = COPY $vgpr1 - %amt1:_(s32) = COPY $vgpr2 - %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a, %amt - %sub:_(s32) = G_SUB %bw, %amt1 - %lshr:_(s32) = G_LSHR %a, %sub - %or:_(s32) = G_OR %shl, %lshr - $vgpr3 = COPY %or + ; CHECK-NEXT: %a:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %amt1:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: %bw:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: %shl:_(i32) = G_SHL %a, %amt(i32) + ; CHECK-NEXT: %sub:_(i32) = G_SUB %bw, %amt1 + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %a, %sub(i32) + ; CHECK-NEXT: %or:_(i32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr3 = COPY %or(i32) + %a:_(i32) = COPY $vgpr0 + %amt:_(i32) = COPY $vgpr1 + %amt1:_(i32) = COPY $vgpr2 + %bw:_(i32) = G_CONSTANT i32 32 + %shl:_(i32) = G_SHL %a, %amt(i32) + %sub:_(i32) = G_SUB %bw, %amt1 + %lshr:_(i32) = G_LSHR %a, %sub(i32) + %or:_(i32) = G_OR %shl, %lshr + $vgpr3 = COPY %or(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.mir index a0ba67f6df0a1..ac9afb9477717 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.mir @@ -10,15 +10,19 @@ body: | ; GCN-LABEL: name: rcp_sqrt_test_f32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FSQRT]](s32) - ; GCN-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[FSQRT:%[0-9]+]]:_(f32) = G_FSQRT [[BITCAST]] + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FSQRT]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_FSQRT %0:_ - %3:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2:_(s32) - $vgpr0 = COPY %3:_(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSQRT %1 + %3:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -32,14 +36,18 @@ body: | ; GCN-LABEL: name: contract_afn_rcp_contract_sqrt_test_f32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[COPY]](s32) - ; GCN-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = contract G_FSQRT %0:_ - %3:_(s32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2:_(s32) - $vgpr0 = COPY %3:_(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = contract G_FSQRT %1 + %3:_(f32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -53,15 +61,19 @@ body: | ; GCN-LABEL: name: sqrt_rcp_test_f32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; GCN-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[INT]] - ; GCN-NEXT: $vgpr0 = COPY [[FSQRT]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f32) + ; GCN-NEXT: [[FSQRT:%[0-9]+]]:_(f32) = G_FSQRT [[INT]] + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSQRT]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0:_(s32) - %3:_(s32) = G_FSQRT %2:_ - $vgpr0 = COPY %3:_(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f32) + %3:_(f32) = G_FSQRT %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -75,15 +87,19 @@ body: | ; GCN-LABEL: name: afn_rcp_afn_amdgcn_sqrt_test_f32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[COPY]](s32) - ; GCN-NEXT: [[INT1:%[0-9]+]]:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GCN-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[BITCAST]](f32) + ; GCN-NEXT: [[INT1:%[0-9]+]]:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %0:_(s32) - %2:_(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1:_(s32) - $vgpr0 = COPY %2 + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %1(f32) + %3:_(f32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -97,15 +113,19 @@ body: | ; GCN-LABEL: name: afn_contract_rcp_afn_contract_amdgcn_sqrt_test_f32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[COPY]](s32) - ; GCN-NEXT: [[INT1:%[0-9]+]]:_(s32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GCN-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[BITCAST]](f32) + ; GCN-NEXT: [[INT1:%[0-9]+]]:_(f32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = afn contract G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %0:_(s32) - %2:_(s32) = afn contract G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1:_(s32) - $vgpr0 = COPY %2 + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %1(f32) + %3:_(f32) = contract afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -119,21 +139,25 @@ body: | ; GCN-LABEL: name: rsq_test_f16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT [[TRUNC]] - ; GCN-NEXT: %one:_(s16) = contract G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GCN-NEXT: %sqrt:_(f16) = G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = contract G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: %rsq:_(f16) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0 - %sqrt:_(s16) = G_FSQRT %1:_ - %one:_(s16) = contract G_FCONSTANT half 1.0 - %rsq:_(s16) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq - $vgpr0 = COPY %ext + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %sqrt:_(f16) = G_FSQRT %2 + %one:_(f16) = contract G_FCONSTANT half 0xH3C00 + %rsq:_(f16) = contract G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -147,21 +171,25 @@ body: | ; GCN-LABEL: name: neg_rsq_test_f16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT [[TRUNC]] - ; GCN-NEXT: %one:_(s16) = contract G_FCONSTANT half 0xHBC00 - ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt - ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16) - ; GCN-NEXT: $vgpr0 = COPY %ext(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GCN-NEXT: %sqrt:_(f16) = G_FSQRT [[BITCAST]] + ; GCN-NEXT: %one:_(f16) = contract G_FCONSTANT half 0xHBC00 + ; GCN-NEXT: %rsq:_(f16) = contract G_FDIV %one, %sqrt + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST %rsq(f16) + ; GCN-NEXT: %ext:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GCN-NEXT: $vgpr0 = COPY %ext(i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0 - %sqrt:_(s16) = G_FSQRT %1:_ - %one:_(s16) = contract G_FCONSTANT half -1.0 - %rsq:_(s16) = contract G_FDIV %one, %sqrt - %ext:_(s32) = G_ANYEXT %rsq - $vgpr0 = COPY %ext + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %sqrt:_(f16) = G_FSQRT %2 + %one:_(f16) = contract G_FCONSTANT half 0xHBC00 + %rsq:_(f16) = contract G_FDIV %one, %sqrt + %6:_(i16) = G_BITCAST %rsq(f16) + %ext:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %ext(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir index 81fd3016e6988..e9b1249e876cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir @@ -12,13 +12,13 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SEXTLOAD]], 7 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 7 - $vgpr0 = COPY %2 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 7 + $vgpr0 = COPY %2(i32) ... @@ -33,12 +33,12 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 8 - $vgpr0 = COPY %2 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 8 + $vgpr0 = COPY %2(i32) ... @@ -53,12 +53,12 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 9 - $vgpr0 = COPY %2 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 9 + $vgpr0 = COPY %2(i32) ... @@ -73,14 +73,14 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SEXTLOAD]], 7 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT %1 - %3:_(s32) = G_SEXT_INREG %2, 7 - $vgpr0 = COPY %3 + %1:_(i8) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT %1(i8) + %3:_(i32) = G_SEXT_INREG %2, 7 + $vgpr0 = COPY %3(i32) ... @@ -95,13 +95,13 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT %1 - %3:_(s32) = G_SEXT_INREG %2, 8 - $vgpr0 = COPY %3 + %1:_(i8) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT %1(i8) + %3:_(i32) = G_SEXT_INREG %2, 8 + $vgpr0 = COPY %3(i32) ... @@ -116,13 +116,13 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT %1 - %3:_(s32) = G_SEXT_INREG %2, 9 - $vgpr0 = COPY %3 + %1:_(i8) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT %1(i8) + %3:_(i32) = G_SEXT_INREG %2, 9 + $vgpr0 = COPY %3(i32) ... @@ -137,15 +137,15 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s32>) = G_SEXT_INREG [[SEXT]], 7 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<2 x s32>) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i32>) = G_SEXT [[LOAD]](<2 x i8>) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x i32>) = G_SEXT_INREG [[SEXT]], 7 + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) - %2:_(<2 x s32>) = G_SEXT %1 - %3:_(<2 x s32>) = G_SEXT_INREG %2, 7 - $vgpr0_vgpr1 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + %2:_(<2 x i32>) = G_SEXT %1(<2 x i8>) + %3:_(<2 x i32>) = G_SEXT_INREG %2, 7 + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... @@ -160,14 +160,14 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i32>) = G_SEXT [[LOAD]](<2 x i8>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) - %2:_(<2 x s32>) = G_SEXT %1 - %3:_(<2 x s32>) = G_SEXT_INREG %2, 8 - $vgpr0_vgpr1 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + %2:_(<2 x i32>) = G_SEXT %1(<2 x i8>) + %3:_(<2 x i32>) = G_SEXT_INREG %2, 8 + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... @@ -182,14 +182,14 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i8>) = G_LOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i32>) = G_SEXT [[LOAD]](<2 x i8>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) - %2:_(<2 x s32>) = G_SEXT %1 - %3:_(<2 x s32>) = G_SEXT_INREG %2, 9 - $vgpr0_vgpr1 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + %2:_(<2 x i32>) = G_SEXT %1(<2 x i8>) + %3:_(<2 x i32>) = G_SEXT_INREG %2, 9 + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... @@ -204,13 +204,13 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXTLOAD]], 7 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ZEXTLOAD]], 7 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 7 - $vgpr0 = COPY %2 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 7 + $vgpr0 = COPY %2(i32) ... @@ -225,13 +225,13 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXTLOAD]], 8 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ZEXTLOAD]], 8 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 8 - $vgpr0 = COPY %2 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 8 + $vgpr0 = COPY %2(i32) ... @@ -246,12 +246,12 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GCN-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 9 - $vgpr0 = COPY %2 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 9 + $vgpr0 = COPY %2(i32) ... @@ -267,23 +267,23 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SEXTLOAD1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SEXTLOAD]], [[SEXTLOAD1]] - ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SEXTLOAD1:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY1]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SEXTLOAD]], [[SEXTLOAD1]] + ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = COPY $vgpr5 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - %6:_(s32) = G_SEXTLOAD %1 :: (load (s8), addrspace 1) - %7:_(s32) = G_SELECT %4, %5, %6 - %8:_(s32) = G_SEXT_INREG %7, 8 - $vgpr0 = COPY %8 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = COPY $vgpr5 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %6:_(i32) = G_SEXTLOAD %1(p1) :: (load (i8), addrspace 1) + %7:_(i32) = G_SELECT %4(i1), %5, %6 + %8:_(i32) = G_SEXT_INREG %7, 8 + $vgpr0 = COPY %8(i32) ... @@ -299,24 +299,24 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[LOAD]], [[SEXTLOAD]] - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT]], 8 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY1]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[LOAD]], [[SEXTLOAD]] + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SELECT]], 8 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = COPY $vgpr5 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32) = G_LOAD %0 :: (load (s32), addrspace 1) - %6:_(s32) = G_SEXTLOAD %1 :: (load (s8), addrspace 1) - %7:_(s32) = G_SELECT %4, %5, %6 - %8:_(s32) = G_SEXT_INREG %7, 8 - $vgpr0 = COPY %8 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = COPY $vgpr5 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %6:_(i32) = G_SEXTLOAD %1(p1) :: (load (i8), addrspace 1) + %7:_(i32) = G_SELECT %4(i1), %5, %6 + %8:_(i32) = G_SEXT_INREG %7, 8 + $vgpr0 = COPY %8(i32) ... @@ -332,24 +332,24 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load (s32), addrspace 1) - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SEXTLOAD]], [[LOAD]] - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT]], 8 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GCN-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY1]](p1) :: (load (i32), addrspace 1) + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SEXTLOAD]], [[LOAD]] + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SELECT]], 8 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = COPY $vgpr5 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - %6:_(s32) = G_LOAD %1 :: (load (s32), addrspace 1) - %7:_(s32) = G_SELECT %4, %5, %6 - %8:_(s32) = G_SEXT_INREG %7, 8 - $vgpr0 = COPY %8 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = COPY $vgpr5 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %6:_(i32) = G_LOAD %1(p1) :: (load (i32), addrspace 1) + %7:_(i32) = G_SELECT %4(i1), %5, %6 + %8:_(i32) = G_SEXT_INREG %7, 8 + $vgpr0 = COPY %8(i32) ... --- @@ -361,13 +361,13 @@ body: | ; GCN-LABEL: name: assert_sext_s8 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %copy:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 8 - ; GCN-NEXT: $vgpr0 = COPY %assert_sext(s32) - %copy:_(s32) = COPY $vgpr0 - %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 8 - %sext_inreg:_(s32) = G_SEXT_INREG %assert_sext, 8 - $vgpr0 = COPY %sext_inreg + ; GCN-NEXT: %copy:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %assert_sext:_(i32) = G_ASSERT_SEXT %copy, 8 + ; GCN-NEXT: $vgpr0 = COPY %assert_sext(i32) + %copy:_(i32) = COPY $vgpr0 + %assert_sext:_(i32) = G_ASSERT_SEXT %copy, 8 + %sext_inreg:_(i32) = G_SEXT_INREG %assert_sext, 8 + $vgpr0 = COPY %sext_inreg(i32) ... --- @@ -379,12 +379,12 @@ body: | ; GCN-LABEL: name: sext_inreg_s7_assert_sext_s8 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %copy:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 8 - ; GCN-NEXT: %sext_inreg:_(s32) = G_SEXT_INREG %assert_sext, 7 - ; GCN-NEXT: $vgpr0 = COPY %sext_inreg(s32) - %copy:_(s32) = COPY $vgpr0 - %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 8 - %sext_inreg:_(s32) = G_SEXT_INREG %assert_sext, 7 - $vgpr0 = COPY %sext_inreg + ; GCN-NEXT: %copy:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %assert_sext:_(i32) = G_ASSERT_SEXT %copy, 8 + ; GCN-NEXT: %sext_inreg:_(i32) = G_SEXT_INREG %assert_sext, 7 + ; GCN-NEXT: $vgpr0 = COPY %sext_inreg(i32) + %copy:_(i32) = COPY $vgpr0 + %assert_sext:_(i32) = G_ASSERT_SEXT %copy, 8 + %sext_inreg:_(i32) = G_SEXT_INREG %assert_sext, 7 + $vgpr0 = COPY %sext_inreg(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir index 8a4c19b6d58a7..4917d16b73536 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -11,28 +11,28 @@ body: | ; CHECK-LABEL: name: test_ashr_i44 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 43 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s44) = G_ASHR [[TRUNC]], [[C]](s44) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR]](s44) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i44) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i44) = G_CONSTANT i44 43 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i44) = G_ASHR [[TRUNC]], [[C]](i44) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[ASHR]](i44) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s44) = G_TRUNC %3(s64) - %5:_(s44) = G_CONSTANT i44 22 - %6:_(s44) = G_ASHR %4, %5(s44) - %7:_(s44) = G_ASHR %6, %5(s44) - %8:_(s64) = G_ANYEXT %7(s44) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) - $vgpr0 = COPY %9(s32) - $vgpr1 = COPY %10(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i44) = G_TRUNC %2(i64) + %4:_(i44) = G_CONSTANT i44 22 + %5:_(i44) = G_ASHR %3, %4(i44) + %6:_(i44) = G_ASHR %5, %4(i44) + %7:_(i64) = G_ANYEXT %6(i44) + %8:_(i32), %9:_(i32) = G_UNMERGE_VALUES %7(i64) + $vgpr0 = COPY %8(i32) + $vgpr1 = COPY %9(i32) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ... @@ -46,29 +46,29 @@ body: | ; CHECK-LABEL: name: test_ashr_i55 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s55) = G_ASHR [[TRUNC]], [[C]](s55) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR]](s55) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i55) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i55) = G_CONSTANT i55 53 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i55) = G_ASHR [[TRUNC]], [[C]](i55) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[ASHR]](i55) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $sgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s55) = G_TRUNC %3(s64) - %5:_(s55) = G_CONSTANT i55 50 - %7:_(s55) = G_CONSTANT i55 3 - %6:_(s55) = G_ASHR %4, %5(s55) - %8:_(s55) = G_ASHR %6, %7(s55) - %9:_(s64) = G_ANYEXT %8(s55) - %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) - $sgpr0 = COPY %10(s32) - $sgpr1 = COPY %11(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i55) = G_TRUNC %2(i64) + %4:_(i55) = G_CONSTANT i55 50 + %5:_(i55) = G_CONSTANT i55 3 + %6:_(i55) = G_ASHR %3, %4(i55) + %7:_(i55) = G_ASHR %6, %5(i55) + %8:_(i64) = G_ANYEXT %7(i55) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(i64) + $sgpr0 = COPY %9(i32) + $sgpr1 = COPY %10(i32) SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ... @@ -82,21 +82,21 @@ body: | ; CHECK-LABEL: name: test_lshr_i44 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $sgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $sgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s44) = G_TRUNC %3(s64) - %5:_(s44) = G_CONSTANT i44 22 - %6:_(s44) = G_LSHR %4, %5(s44) - %7:_(s44) = G_LSHR %6, %5(s44) - %8:_(s64) = G_ANYEXT %7(s44) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) - $sgpr0 = COPY %9(s32) - $sgpr1 = COPY %10(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i44) = G_TRUNC %2(i64) + %4:_(i44) = G_CONSTANT i44 22 + %5:_(i44) = G_LSHR %3, %4(i44) + %6:_(i44) = G_LSHR %5, %4(i44) + %7:_(i64) = G_ANYEXT %6(i44) + %8:_(i32), %9:_(i32) = G_UNMERGE_VALUES %7(i64) + $sgpr0 = COPY %8(i32) + $sgpr1 = COPY %9(i32) SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ... @@ -110,29 +110,29 @@ body: | ; CHECK-LABEL: name: test_lshr_i55 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s55) = G_LSHR [[TRUNC]], [[C]](s55) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s55) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i55) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i55) = G_CONSTANT i55 53 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i55) = G_LSHR [[TRUNC]], [[C]](i55) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LSHR]](i55) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s55) = G_TRUNC %3(s64) - %5:_(s55) = G_CONSTANT i55 50 - %7:_(s55) = G_CONSTANT i55 3 - %6:_(s55) = G_LSHR %4, %5(s55) - %8:_(s55) = G_LSHR %6, %7(s55) - %9:_(s64) = G_ANYEXT %8(s55) - %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) - $vgpr0 = COPY %10(s32) - $vgpr1 = COPY %11(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i55) = G_TRUNC %2(i64) + %4:_(i55) = G_CONSTANT i55 50 + %5:_(i55) = G_CONSTANT i55 3 + %6:_(i55) = G_LSHR %3, %4(i55) + %7:_(i55) = G_LSHR %6, %5(i55) + %8:_(i64) = G_ANYEXT %7(i55) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(i64) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ... @@ -146,21 +146,21 @@ body: | ; CHECK-LABEL: name: test_shl_i44 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s44) = G_TRUNC %3(s64) - %5:_(s44) = G_CONSTANT i44 22 - %6:_(s44) = G_SHL %4, %5(s44) - %7:_(s44) = G_SHL %6, %5(s44) - %8:_(s64) = G_ANYEXT %7(s44) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) - $vgpr0 = COPY %9(s32) - $vgpr1 = COPY %10(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i44) = G_TRUNC %2(i64) + %4:_(i44) = G_CONSTANT i44 22 + %5:_(i44) = G_SHL %3, %4(i44) + %6:_(i44) = G_SHL %5, %4(i44) + %7:_(i64) = G_ANYEXT %6(i44) + %8:_(i32), %9:_(i32) = G_UNMERGE_VALUES %7(i64) + $vgpr0 = COPY %8(i32) + $vgpr1 = COPY %9(i32) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ... @@ -174,29 +174,29 @@ body: | ; CHECK-LABEL: name: test_shl_i55 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s55) = G_SHL [[TRUNC]], [[C]](s55) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s55) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i55) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i55) = G_CONSTANT i55 53 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i55) = G_SHL [[TRUNC]], [[C]](i55) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[SHL]](i55) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $sgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s55) = G_TRUNC %3(s64) - %5:_(s55) = G_CONSTANT i55 50 - %7:_(s55) = G_CONSTANT i55 3 - %6:_(s55) = G_SHL %4, %5(s55) - %8:_(s55) = G_SHL %6, %7(s55) - %9:_(s64) = G_ANYEXT %8(s55) - %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) - $sgpr0 = COPY %10(s32) - $sgpr1 = COPY %11(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i55) = G_TRUNC %2(i64) + %4:_(i55) = G_CONSTANT i55 50 + %5:_(i55) = G_CONSTANT i55 3 + %6:_(i55) = G_SHL %3, %4(i55) + %7:_(i55) = G_SHL %6, %5(i55) + %8:_(i64) = G_ANYEXT %7(i55) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(i64) + $sgpr0 = COPY %9(i32) + $sgpr1 = COPY %10(i32) SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ... @@ -210,32 +210,32 @@ body: | ; CHECK-LABEL: name: sshlsat_i44 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 43 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i44) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i44) = G_CONSTANT i44 43 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i44) = G_SSHLSAT [[TRUNC]], [[C]](i44) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[SSHLSAT]](i44) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %0:_(s44) = G_TRUNC %3(s64) - %5:_(s44) = G_CONSTANT i44 22 - %6:_(s44) = G_SSHLSAT %0, %5(s44) - %7:_(s44) = G_SSHLSAT %6, %5(s44) - %8:_(s64) = G_ANYEXT %7(s44) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) - %11:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32) - $sgpr0 = COPY %11(s32) - %12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32) - $sgpr1 = COPY %12(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i44) = G_TRUNC %2(i64) + %4:_(i44) = G_CONSTANT i44 22 + %5:_(i44) = G_SSHLSAT %3, %4(i44) + %6:_(i44) = G_SSHLSAT %5, %4(i44) + %7:_(i64) = G_ANYEXT %6(i44) + %8:_(i32), %9:_(i32) = G_UNMERGE_VALUES %7(i64) + %10:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(i32) + $sgpr0 = COPY %10(i32) + %11:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(i32) + $sgpr1 = COPY %11(i32) SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ... @@ -249,33 +249,33 @@ body: | ; CHECK-LABEL: name: sshlsat_i55 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i55) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i55) = G_CONSTANT i55 53 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i55) = G_SSHLSAT [[TRUNC]], [[C]](i55) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[SSHLSAT]](i55) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[INTRINSIC_CONVERGENT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %0:_(s55) = G_TRUNC %3(s64) - %5:_(s55) = G_CONSTANT i55 50 - %7:_(s55) = G_CONSTANT i55 3 - %6:_(s55) = G_SSHLSAT %0, %5(s55) - %8:_(s55) = G_SSHLSAT %6, %7(s55) - %9:_(s64) = G_ANYEXT %8(s55) - %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) - %12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32) - $vgpr0 = COPY %12(s32) - %13:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %11(s32) - $vgpr1 = COPY %13(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i55) = G_TRUNC %2(i64) + %4:_(i55) = G_CONSTANT i55 50 + %5:_(i55) = G_CONSTANT i55 3 + %6:_(i55) = G_SSHLSAT %3, %4(i55) + %7:_(i55) = G_SSHLSAT %6, %5(i55) + %8:_(i64) = G_ANYEXT %7(i55) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(i64) + %11:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(i32) + $vgpr0 = COPY %11(i32) + %12:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(i32) + $vgpr1 = COPY %12(i32) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ... @@ -289,29 +289,29 @@ body: | ; CHECK-LABEL: name: ushlsat_i44 ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 22 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s44) = G_USHLSAT [[TRUNC]], [[C]](s44) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s44) = G_USHLSAT [[USHLSAT]], [[C]](s44) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[USHLSAT1]](s44) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i44) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i44) = G_CONSTANT i44 22 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i44) = G_USHLSAT [[TRUNC]], [[C]](i44) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i44) = G_USHLSAT [[USHLSAT]], [[C]](i44) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[USHLSAT1]](i44) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) - %0:_(s44) = G_TRUNC %4(s64) - %5:_(s44) = G_CONSTANT i44 22 - %6:_(s44) = G_USHLSAT %0, %5(s44) - %7:_(s44) = G_USHLSAT %6, %5(s44) - %9:_(s64) = G_ANYEXT %7(s44) - %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) - $vgpr0 = COPY %10(s32) - $vgpr1 = COPY %11(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i44) = G_TRUNC %2(i64) + %4:_(i44) = G_CONSTANT i44 22 + %5:_(i44) = G_USHLSAT %3, %4(i44) + %6:_(i44) = G_USHLSAT %5, %4(i44) + %7:_(i64) = G_ANYEXT %6(i44) + %8:_(i32), %9:_(i32) = G_UNMERGE_VALUES %7(i64) + $vgpr0 = COPY %8(i32) + $vgpr1 = COPY %9(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -325,29 +325,29 @@ body: | ; CHECK-LABEL: name: ushlsat_i55 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s55) = G_USHLSAT [[TRUNC]], [[C]](s55) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[USHLSAT]](s55) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i55) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i55) = G_CONSTANT i55 53 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i55) = G_USHLSAT [[TRUNC]], [[C]](i55) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[USHLSAT]](i55) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ANYEXT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) - %0:_(s55) = G_TRUNC %4(s64) - %5:_(s55) = G_CONSTANT i55 50 - %7:_(s55) = G_CONSTANT i55 3 - %6:_(s55) = G_USHLSAT %0, %5(s55) - %8:_(s55) = G_USHLSAT %6, %7(s55) - %10:_(s64) = G_ANYEXT %8(s55) - %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) - $vgpr0 = COPY %11(s32) - $vgpr1 = COPY %12(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i55) = G_TRUNC %2(i64) + %4:_(i55) = G_CONSTANT i55 50 + %5:_(i55) = G_CONSTANT i55 3 + %6:_(i55) = G_USHLSAT %3, %4(i55) + %7:_(i55) = G_USHLSAT %6, %5(i55) + %8:_(i64) = G_ANYEXT %7(i55) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(i64) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir index 4f38e39404c99..c4f0cd4b0772f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir @@ -11,16 +11,16 @@ body: | ; CHECK-LABEL: name: sshlsat_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %4:_(s32) = G_SSHLSAT %3, %2(s32) - $vgpr0 = COPY %4(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(i32) = G_SSHLSAT %0, %1(i32) + %3:_(i32) = G_SSHLSAT %2, %1(i32) + $vgpr0 = COPY %3(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -34,23 +34,23 @@ body: | ; CHECK-LABEL: name: sshlsat_2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 1 - %4:_(s32) = G_CONSTANT i32 2 - %6:_(s32) = G_CONSTANT i32 3 - %8:_(s32) = G_CONSTANT i32 4 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_SSHLSAT %3, %4(s32) - %7:_(s32) = G_SSHLSAT %5, %6(s32) - %9:_(s32) = G_SSHLSAT %7, %8(s32) - %10:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32) - $sgpr0 = COPY %10(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 2 + %3:_(i32) = G_CONSTANT i32 3 + %4:_(i32) = G_CONSTANT i32 4 + %5:_(i32) = G_SSHLSAT %0, %1(i32) + %6:_(i32) = G_SSHLSAT %5, %2(i32) + %7:_(i32) = G_SSHLSAT %6, %3(i32) + %8:_(i32) = G_SSHLSAT %7, %4(i32) + %9:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(i32) + $sgpr0 = COPY %9(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -64,18 +64,18 @@ body: | ; CHECK-LABEL: name: sshlsat_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 10 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %4:_(s32) = G_SSHLSAT %3, %2(s32) - %5:_(s32) = G_SSHLSAT %4, %2(s32) - %6:_(s32) = G_SSHLSAT %5, %2(s32) - $vgpr0 = COPY %6(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 10 + %2:_(i32) = G_SSHLSAT %0, %1(i32) + %3:_(i32) = G_SSHLSAT %2, %1(i32) + %4:_(i32) = G_SSHLSAT %3, %1(i32) + %5:_(i32) = G_SSHLSAT %4, %1(i32) + $vgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -89,30 +89,30 @@ body: | ; CHECK-LABEL: name: sshlsat_i64 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 62 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[C]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 62 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i64) = G_SSHLSAT [[MV]], [[C]](i64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SSHLSAT]](i64) + ; CHECK-NEXT: $sgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s64) = G_CONSTANT i64 10 - %11:_(s64) = G_CONSTANT i64 2 - %5:_(s64) = G_SSHLSAT %0, %4(s64) - %6:_(s64) = G_SSHLSAT %5, %4(s64) - %7:_(s64) = G_SSHLSAT %6, %4(s64) - %8:_(s64) = G_SSHLSAT %7, %4(s64) - %9:_(s64) = G_SSHLSAT %8, %4(s64) - %10:_(s64) = G_SSHLSAT %9, %4(s64) - %12:_(s64) = G_SSHLSAT %10, %11(s64) - %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %12(s64) - $sgpr0 = COPY %13(s32) - $sgpr1 = COPY %14(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i64) = G_CONSTANT i64 10 + %4:_(i64) = G_CONSTANT i64 2 + %5:_(i64) = G_SSHLSAT %2, %3(i64) + %6:_(i64) = G_SSHLSAT %5, %3(i64) + %7:_(i64) = G_SSHLSAT %6, %3(i64) + %8:_(i64) = G_SSHLSAT %7, %3(i64) + %9:_(i64) = G_SSHLSAT %8, %3(i64) + %10:_(i64) = G_SSHLSAT %9, %3(i64) + %11:_(i64) = G_SSHLSAT %10, %4(i64) + %12:_(i32), %13:_(i32) = G_UNMERGE_VALUES %11(i64) + $sgpr0 = COPY %12(i32) + $sgpr1 = COPY %13(i32) SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ... @@ -126,16 +126,16 @@ body: | ; CHECK-LABEL: name: ushlsat_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %4:_(s32) = G_USHLSAT %3, %2(s32) - $vgpr0 = COPY %4(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(i32) = G_USHLSAT %0, %1(i32) + %3:_(i32) = G_USHLSAT %2, %1(i32) + $vgpr0 = COPY %3(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -149,21 +149,21 @@ body: | ; CHECK-LABEL: name: ushlsat_2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[USHLSAT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 1 - %4:_(s32) = G_CONSTANT i32 2 - %6:_(s32) = G_CONSTANT i32 3 - %8:_(s32) = G_CONSTANT i32 4 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_USHLSAT %3, %4(s32) - %7:_(s32) = G_USHLSAT %5, %6(s32) - %9:_(s32) = G_USHLSAT %7, %8(s32) - $sgpr0 = COPY %9(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 2 + %3:_(i32) = G_CONSTANT i32 3 + %4:_(i32) = G_CONSTANT i32 4 + %5:_(i32) = G_USHLSAT %0, %1(i32) + %6:_(i32) = G_USHLSAT %5, %2(i32) + %7:_(i32) = G_USHLSAT %6, %3(i32) + %8:_(i32) = G_USHLSAT %7, %4(i32) + $sgpr0 = COPY %8(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -177,20 +177,20 @@ body: | ; CHECK-LABEL: name: ushlsat_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[USHLSAT]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[USHLSAT]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 10 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %4:_(s32) = G_USHLSAT %3, %2(s32) - %5:_(s32) = G_USHLSAT %4, %2(s32) - %6:_(s32) = G_USHLSAT %5, %2(s32) - $vgpr0 = COPY %6(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 10 + %2:_(i32) = G_USHLSAT %0, %1(i32) + %3:_(i32) = G_USHLSAT %2, %1(i32) + %4:_(i32) = G_USHLSAT %3, %1(i32) + %5:_(i32) = G_USHLSAT %4, %1(i32) + $vgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -204,31 +204,31 @@ body: | ; CHECK-LABEL: name: ushlsat_i64 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[C1]](s64) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s64) = G_USHLSAT [[USHLSAT]], [[C]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT1]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 60 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i64) = G_USHLSAT [[MV]], [[C1]](i64) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i64) = G_USHLSAT [[USHLSAT]], [[C]](i64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[USHLSAT1]](i64) + ; CHECK-NEXT: $sgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s64) = G_CONSTANT i64 10 - %5:_(s64) = G_USHLSAT %0, %4(s64) - %6:_(s64) = G_USHLSAT %5, %4(s64) - %7:_(s64) = G_USHLSAT %6, %4(s64) - %8:_(s64) = G_USHLSAT %7, %4(s64) - %9:_(s64) = G_USHLSAT %8, %4(s64) - %10:_(s64) = G_USHLSAT %9, %4(s64) - %11:_(s64) = G_USHLSAT %10, %4(s64) - %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %11(s64) - $sgpr0 = COPY %12(s32) - $sgpr1 = COPY %13(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i64) = G_CONSTANT i64 10 + %4:_(i64) = G_USHLSAT %2, %3(i64) + %5:_(i64) = G_USHLSAT %4, %3(i64) + %6:_(i64) = G_USHLSAT %5, %3(i64) + %7:_(i64) = G_USHLSAT %6, %3(i64) + %8:_(i64) = G_USHLSAT %7, %3(i64) + %9:_(i64) = G_USHLSAT %8, %3(i64) + %10:_(i64) = G_USHLSAT %9, %3(i64) + %11:_(i32), %12:_(i32) = G_UNMERGE_VALUES %10(i64) + $sgpr0 = COPY %11(i32) + $sgpr1 = COPY %12(i32) SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic-shlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic-shlsat.mir index 990d8302a37f0..7ae7c0d72debc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic-shlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic-shlsat.mir @@ -11,22 +11,22 @@ body: | ; CHECK-LABEL: name: ushlsat_and_1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1073741820 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C1]], [[C]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1073741820 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C1]], [[C]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 2 - %4:_(s32) = G_CONSTANT i32 1073741820 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_AND %3, %4 - %6:_(s32) = G_USHLSAT %5, %2(s32) - $sgpr0 = COPY %6(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(i32) = G_CONSTANT i32 1073741820 + %3:_(i32) = G_USHLSAT %0, %1(i32) + %4:_(i32) = G_AND %3, %2 + %5:_(i32) = G_USHLSAT %4, %1(i32) + $sgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -40,23 +40,23 @@ body: | ; CHECK-LABEL: name: ushlsat_and_2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 536870880 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 536870880 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 5 - %4:_(s32) = G_CONSTANT i32 536870880 - %6:_(s32) = G_CONSTANT i32 3 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_AND %3, %4 - %7:_(s32) = G_USHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 536870880 + %3:_(i32) = G_CONSTANT i32 3 + %4:_(i32) = G_USHLSAT %0, %1(i32) + %5:_(i32) = G_AND %4, %2 + %6:_(i32) = G_USHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -70,23 +70,23 @@ body: | ; CHECK-LABEL: name: ushlsat_and_3 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 3 - %4:_(s32) = G_CONSTANT i32 65536 - %6:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_AND %3, %4 - %7:_(s32) = G_USHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_CONSTANT i32 65536 + %3:_(i32) = G_CONSTANT i32 16 + %4:_(i32) = G_USHLSAT %0, %1(i32) + %5:_(i32) = G_AND %4, %2 + %6:_(i32) = G_USHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -100,22 +100,22 @@ body: | ; CHECK-LABEL: name: ushlsat_or_1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1073741821 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1073741821 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 2 - %4:_(s32) = G_CONSTANT i32 -1073741821 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_OR %3, %4 - %6:_(s32) = G_USHLSAT %5, %2(s32) - $sgpr0 = COPY %6(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(i32) = G_CONSTANT i32 -1073741821 + %3:_(i32) = G_USHLSAT %0, %1(i32) + %4:_(i32) = G_OR %3, %2 + %5:_(i32) = G_USHLSAT %4, %1(i32) + $sgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -129,23 +129,23 @@ body: | ; CHECK-LABEL: name: ushlsat_or_2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -536870881 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -536870881 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 5 - %4:_(s32) = G_CONSTANT i32 -536870881 - %6:_(s32) = G_CONSTANT i32 3 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_OR %3, %4 - %7:_(s32) = G_USHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 -536870881 + %3:_(i32) = G_CONSTANT i32 3 + %4:_(i32) = G_USHLSAT %0, %1(i32) + %5:_(i32) = G_OR %4, %2 + %6:_(i32) = G_USHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -159,23 +159,23 @@ body: | ; CHECK-LABEL: name: ushlsat_or_3 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 3 - %4:_(s32) = G_CONSTANT i32 65536 - %6:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_OR %3, %4 - %7:_(s32) = G_USHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_CONSTANT i32 65536 + %3:_(i32) = G_CONSTANT i32 16 + %4:_(i32) = G_USHLSAT %0, %1(i32) + %5:_(i32) = G_OR %4, %2 + %6:_(i32) = G_USHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -189,23 +189,23 @@ body: | ; CHECK-LABEL: name: ushlsat_xor ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 43690 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[USHLSAT]], [[USHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 43690 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(i32) = G_USHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[USHLSAT1:%[0-9]+]]:_(i32) = G_USHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[USHLSAT]], [[USHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 3 - %4:_(s32) = G_CONSTANT i32 43690 - %6:_(s32) = G_CONSTANT i32 5 - %3:_(s32) = G_USHLSAT %0, %2(s32) - %5:_(s32) = G_XOR %3, %4 - %7:_(s32) = G_USHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_CONSTANT i32 43690 + %3:_(i32) = G_CONSTANT i32 5 + %4:_(i32) = G_USHLSAT %0, %1(i32) + %5:_(i32) = G_XOR %4, %2 + %6:_(i32) = G_USHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -219,22 +219,22 @@ body: | ; CHECK-LABEL: name: sshlsat_and_1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1073741820 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C1]], [[C]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1073741820 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C1]], [[C]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 2 - %4:_(s32) = G_CONSTANT i32 1073741820 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_AND %3, %4 - %6:_(s32) = G_SSHLSAT %5, %2(s32) - $sgpr0 = COPY %6(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(i32) = G_CONSTANT i32 1073741820 + %3:_(i32) = G_SSHLSAT %0, %1(i32) + %4:_(i32) = G_AND %3, %2 + %5:_(i32) = G_SSHLSAT %4, %1(i32) + $sgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -248,23 +248,23 @@ body: | ; CHECK-LABEL: name: sshlsat_and_2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 536870880 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 536870880 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 5 - %4:_(s32) = G_CONSTANT i32 536870880 - %6:_(s32) = G_CONSTANT i32 3 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_AND %3, %4 - %7:_(s32) = G_SSHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 536870880 + %3:_(i32) = G_CONSTANT i32 3 + %4:_(i32) = G_SSHLSAT %0, %1(i32) + %5:_(i32) = G_AND %4, %2 + %6:_(i32) = G_SSHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -278,23 +278,23 @@ body: | ; CHECK-LABEL: name: sshlsat_and_3 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 3 - %4:_(s32) = G_CONSTANT i32 65536 - %6:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_AND %3, %4 - %7:_(s32) = G_SSHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_CONSTANT i32 65536 + %3:_(i32) = G_CONSTANT i32 16 + %4:_(i32) = G_SSHLSAT %0, %1(i32) + %5:_(i32) = G_AND %4, %2 + %6:_(i32) = G_SSHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -308,22 +308,22 @@ body: | ; CHECK-LABEL: name: sshlsat_or_1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1073741821 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1073741821 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 2 - %4:_(s32) = G_CONSTANT i32 -1073741821 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_OR %3, %4 - %6:_(s32) = G_SSHLSAT %5, %2(s32) - $sgpr0 = COPY %6(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(i32) = G_CONSTANT i32 -1073741821 + %3:_(i32) = G_SSHLSAT %0, %1(i32) + %4:_(i32) = G_OR %3, %2 + %5:_(i32) = G_SSHLSAT %4, %1(i32) + $sgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -337,23 +337,23 @@ body: | ; CHECK-LABEL: name: sshlsat_or_2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -536870881 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -536870881 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 5 - %4:_(s32) = G_CONSTANT i32 -536870881 - %6:_(s32) = G_CONSTANT i32 3 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_OR %3, %4 - %7:_(s32) = G_SSHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(i32) = G_CONSTANT i32 -536870881 + %3:_(i32) = G_CONSTANT i32 3 + %4:_(i32) = G_SSHLSAT %0, %1(i32) + %5:_(i32) = G_OR %4, %2 + %6:_(i32) = G_SSHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -367,23 +367,23 @@ body: | ; CHECK-LABEL: name: sshlsat_or_3 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[OR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 3 - %4:_(s32) = G_CONSTANT i32 65536 - %6:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_OR %3, %4 - %7:_(s32) = G_SSHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_CONSTANT i32 65536 + %3:_(i32) = G_CONSTANT i32 16 + %4:_(i32) = G_SSHLSAT %0, %1(i32) + %5:_(i32) = G_OR %4, %2 + %6:_(i32) = G_SSHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -397,23 +397,23 @@ body: | ; CHECK-LABEL: name: sshlsat_xor ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 43690 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SSHLSAT]], [[SSHLSAT1]] - ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 43690 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(i32) = G_SSHLSAT [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[SSHLSAT1:%[0-9]+]]:_(i32) = G_SSHLSAT [[C]], [[C1]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK-NEXT: $sgpr0 = COPY [[XOR]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:_(s32) = COPY $sgpr0 - %2:_(s32) = G_CONSTANT i32 3 - %4:_(s32) = G_CONSTANT i32 43690 - %6:_(s32) = G_CONSTANT i32 5 - %3:_(s32) = G_SSHLSAT %0, %2(s32) - %5:_(s32) = G_XOR %3, %4 - %7:_(s32) = G_SSHLSAT %5, %6(s32) - $sgpr0 = COPY %7(s32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(i32) = G_CONSTANT i32 43690 + %3:_(i32) = G_CONSTANT i32 5 + %4:_(i32) = G_SSHLSAT %0, %1(i32) + %5:_(i32) = G_XOR %4, %2 + %6:_(i32) = G_SSHLSAT %5, %3(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir index fd794bd7d9cf9..182dd5c7ba9eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir @@ -13,19 +13,19 @@ body: | ; CHECK-LABEL: name: combine_ashr ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](i32), [[MV]](p0) :: (store (i32)) ; CHECK-NEXT: SI_RETURN - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = G_CONSTANT i32 10 - %4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) - %5:_(s32) = G_ASHR %4, %3(s32) - G_STORE %5(s32), %2(p0) :: (store (s32)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_CONSTANT i32 10 + %4:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) + %5:_(i32) = G_ASHR %4, %3(i32) + G_STORE %5(i32), %2(p0) :: (store (i32)) SI_RETURN ... @@ -41,19 +41,19 @@ body: | ; CHECK-LABEL: name: combine_lshr ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](i32), [[MV]](p0) :: (store (i32)) ; CHECK-NEXT: SI_RETURN - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = G_CONSTANT i32 10 - %4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) - %5:_(s32) = G_LSHR %4, %3(s32) - G_STORE %5(s32), %2(p0) :: (store (s32)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_CONSTANT i32 10 + %4:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) + %5:_(i32) = G_LSHR %4, %3(i32) + G_STORE %5(i32), %2(p0) :: (store (i32)) SI_RETURN ... @@ -69,19 +69,19 @@ body: | ; CHECK-LABEL: name: combine_shl ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](i32), [[MV]](p0) :: (store (i32)) ; CHECK-NEXT: SI_RETURN - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = G_CONSTANT i32 16 - %4:_(s32) = G_CONSTANT i32 4294901760 - %5:_(s32) = G_SHL %4, %3(s32) - G_STORE %5(s32), %2(p0) :: (store (s32)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_CONSTANT i32 16 + %4:_(i32) = G_CONSTANT i32 -65536 + %5:_(i32) = G_SHL %4, %3(i32) + G_STORE %5(i32), %2(p0) :: (store (i32)) SI_RETURN ... @@ -97,19 +97,19 @@ body: | ; CHECK-LABEL: name: combine_ashr2 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 - ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p0) :: (store (s8)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 -1 + ; CHECK-NEXT: G_STORE [[C]](i8), [[MV]](p0) :: (store (i8)) ; CHECK-NEXT: SI_RETURN - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = G_CONSTANT i32 1 - %4:_(s8) = G_CONSTANT i8 -2 - %5:_(s8) = G_ASHR %4, %3(s32) - G_STORE %5(s8), %2(p0) :: (store (s8)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = G_CONSTANT i32 1 + %4:_(i8) = G_CONSTANT i8 -2 + %5:_(i8) = G_ASHR %4, %3(i32) + G_STORE %5(i8), %2(p0) :: (store (i8)) SI_RETURN ... @@ -125,22 +125,22 @@ body: | ; CHECK-LABEL: name: combine_vector_lshr ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %0:_(<2 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 511 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_CONSTANT i32 1 - %4:_(s32) = G_CONSTANT i32 9 - %5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32) - %6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32) - %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32) - %8:_(<2 x s32>) = G_LSHR %7, %5(<2 x s32>) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>) - $vgpr0 = COPY %9(s32) - $vgpr1 = COPY %10(s32) + %0:_(<2 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 511 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_CONSTANT i32 1 + %4:_(i32) = G_CONSTANT i32 9 + %5:_(<2 x i32>) = G_BUILD_VECTOR %4(i32), %4(i32) + %6:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + %7:_(<2 x i32>) = G_INSERT_VECTOR_ELT %6, %1(i32), %3(i32) + %8:_(<2 x i32>) = G_LSHR %7, %5(<2 x i32>) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(<2 x i32>) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... @@ -156,22 +156,22 @@ body: | ; CHECK-LABEL: name: combine_vector_shl ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %0:_(<2 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 4294901760 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_CONSTANT i32 1 - %4:_(s32) = G_CONSTANT i32 16 - %5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32) - %6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32) - %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32) - %8:_(<2 x s32>) = G_SHL %7, %5(<2 x s32>) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>) - $vgpr0 = COPY %9(s32) - $vgpr1 = COPY %10(s32) + %0:_(<2 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 -65536 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_CONSTANT i32 1 + %4:_(i32) = G_CONSTANT i32 16 + %5:_(<2 x i32>) = G_BUILD_VECTOR %4(i32), %4(i32) + %6:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + %7:_(<2 x i32>) = G_INSERT_VECTOR_ELT %6, %1(i32), %3(i32) + %8:_(<2 x i32>) = G_SHL %7, %5(<2 x i32>) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(<2 x i32>) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... @@ -187,22 +187,22 @@ body: | ; CHECK-LABEL: name: combine_vector_ashr ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[C]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %0:_(<2 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 -1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_CONSTANT i32 1 - %4:_(s32) = G_CONSTANT i32 1 - %5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32) - %6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32) - %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32) - %8:_(<2 x s32>) = G_ASHR %7, %5(<2 x s32>) - %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>) - $vgpr0 = COPY %9(s32) - $vgpr1 = COPY %10(s32) + %0:_(<2 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 -1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_CONSTANT i32 1 + %4:_(i32) = G_CONSTANT i32 1 + %5:_(<2 x i32>) = G_BUILD_VECTOR %4(i32), %4(i32) + %6:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + %7:_(<2 x i32>) = G_INSERT_VECTOR_ELT %6, %1(i32), %3(i32) + %8:_(<2 x i32>) = G_ASHR %7, %5(<2 x i32>) + %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %8(<2 x i32>) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir index a8cd974b01ab4..fe9a32a6d7d08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir @@ -14,25 +14,25 @@ body: | ; GFX6-LABEL: name: shl_s64_by_2_from_anyext_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; GFX9-LABEL: name: shl_s64_by_2_from_anyext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ANYEXT %0 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(s64) = G_SHL %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ANYEXT %0(i32) + %2:_(i32) = G_CONSTANT i32 2 + %3:_(i64) = G_SHL %1, %2(i32) + $vgpr0_vgpr1 = COPY %3(i64) ... # Can't narrow this; need known bits @@ -47,25 +47,25 @@ body: | ; GFX6-LABEL: name: shl_s64_by_2_from_sext_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; GFX9-LABEL: name: shl_s64_by_2_from_sext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_SEXT %0 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(s64) = G_SHL %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_SEXT %0(i32) + %2:_(i32) = G_CONSTANT i32 2 + %3:_(i64) = G_SHL %1, %2(i32) + $vgpr0_vgpr1 = COPY %3(i64) ... # Can't narrow this; need known bits @@ -80,25 +80,25 @@ body: | ; GFX6-LABEL: name: shl_s64_by_2_from_zext_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; GFX9-LABEL: name: shl_s64_by_2_from_zext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ZEXT %0 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(s64) = G_SHL %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ZEXT %0(i32) + %2:_(i32) = G_CONSTANT i32 2 + %3:_(i64) = G_SHL %1, %2(i32) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -112,31 +112,31 @@ body: | ; GFX6-LABEL: name: narrow_shl_s64_by_2_from_anyext_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX6-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; GFX6-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX6-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX6-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_anyext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX9-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX9-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) - %narrow:_(s32) = COPY $vgpr0 - %masklow30:_(s32) = G_CONSTANT i32 1073741823 - %masked:_(s32) = G_AND %narrow, %masklow30 - %extend:_(s64) = G_ANYEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s64) = G_SHL %extend, %shiftamt - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX9-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX9-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) + %narrow:_(i32) = COPY $vgpr0 + %masklow30:_(i32) = G_CONSTANT i32 1073741823 + %masked:_(i32) = G_AND %narrow, %masklow30 + %extend:_(i64) = G_ANYEXT %masked(i32) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + $vgpr0_vgpr1 = COPY %shl(i64) ... --- @@ -150,31 +150,31 @@ body: | ; GFX6-LABEL: name: narrow_shl_s64_by_2_from_zext_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX6-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; GFX6-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX6-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX6-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX9-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX9-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) - %narrow:_(s32) = COPY $vgpr0 - %masklow30:_(s32) = G_CONSTANT i32 1073741823 - %masked:_(s32) = G_AND %narrow, %masklow30 - %extend:_(s64) = G_ZEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s64) = G_SHL %extend, %shiftamt - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX9-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX9-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) + %narrow:_(i32) = COPY $vgpr0 + %masklow30:_(i32) = G_CONSTANT i32 1073741823 + %masked:_(i32) = G_AND %narrow, %masklow30 + %extend:_(i64) = G_ZEXT %masked(i32) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + $vgpr0_vgpr1 = COPY %shl(i64) ... --- @@ -188,31 +188,31 @@ body: | ; GFX6-LABEL: name: narrow_shl_s64_by_2_from_sext_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX6-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; GFX6-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX6-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX6-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_sext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX9-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX9-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) - %narrow:_(s32) = COPY $vgpr0 - %masklow30:_(s32) = G_CONSTANT i32 1073741823 - %masked:_(s32) = G_AND %narrow, %masklow30 - %extend:_(s64) = G_SEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s64) = G_SHL %extend, %shiftamt - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX9-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX9-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) + %narrow:_(i32) = COPY $vgpr0 + %masklow30:_(i32) = G_CONSTANT i32 1073741823 + %masked:_(i32) = G_AND %narrow, %masklow30 + %extend:_(i64) = G_SEXT %masked(i32) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + $vgpr0_vgpr1 = COPY %shl(i64) ... --- @@ -226,32 +226,32 @@ body: | ; GFX6-LABEL: name: narrow_shl_s64_by_2_from_zext_s32_lookthrough_amount ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX6-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; GFX6-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX6-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX6-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s32_lookthrough_amount ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %narrow:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX9-NEXT: %masked:_(s32) = G_AND %narrow, %masklow30 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) - ; GFX9-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) - %narrow:_(s32) = COPY $vgpr0 - %masklow30:_(s32) = G_CONSTANT i32 1073741823 - %masked:_(s32) = G_AND %narrow, %masklow30 - %extend:_(s64) = G_ZEXT %masked - %shiftamt64:_(s64) = G_CONSTANT i64 2 - %shiftamt:_(s32) = G_TRUNC %shiftamt64 - %shl:_(s64) = G_SHL %extend, %shiftamt - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %narrow:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX9-NEXT: %masked:_(i32) = G_AND %narrow, %masklow30 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL %masked, [[C]](i32) + ; GFX9-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) + %narrow:_(i32) = COPY $vgpr0 + %masklow30:_(i32) = G_CONSTANT i32 1073741823 + %masked:_(i32) = G_AND %narrow, %masklow30 + %extend:_(i64) = G_ZEXT %masked(i32) + %shiftamt64:_(i64) = G_CONSTANT i64 2 + %shiftamt:_(i32) = G_TRUNC %shiftamt64(i64) + %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + $vgpr0_vgpr1 = COPY %shl(i64) ... # Can't introduce a 16-bit shift before gfx8 @@ -266,34 +266,34 @@ body: | ; GFX6-LABEL: name: narrow_shl_s32_by_2_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX6-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %masked(s16) - ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s32) - ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX6-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX6-NEXT: %extend:_(i32) = G_ZEXT %masked(i16) + ; GFX6-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: %shl:_(i32) = G_SHL %extend, %shiftamt(i32) + ; GFX6-NEXT: $vgpr0 = COPY %shl(i32) ; ; GFX9-LABEL: name: narrow_shl_s32_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX9-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) - ; GFX9-NEXT: %shl:_(s32) = G_ZEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) - %argument:_(s32) = COPY $vgpr0 - %narrow:_(s16) = G_TRUNC %argument - %masklow14:_(s16) = G_CONSTANT i16 16383 - %masked:_(s16) = G_AND %narrow, %masklow14 - %extend:_(s32) = G_ZEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s32) = G_SHL %extend, %shiftamt - $vgpr0 = COPY %shl + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX9-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL %masked, [[C]](i16) + ; GFX9-NEXT: %shl:_(i32) = G_ZEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(i32) + %argument:_(i32) = COPY $vgpr0 + %narrow:_(i16) = G_TRUNC %argument(i32) + %masklow14:_(i16) = G_CONSTANT i16 16383 + %masked:_(i16) = G_AND %narrow, %masklow14 + %extend:_(i32) = G_ZEXT %masked(i16) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i32) = G_SHL %extend, %shiftamt(i32) + $vgpr0 = COPY %shl(i32) ... --- @@ -307,34 +307,34 @@ body: | ; GFX6-LABEL: name: narrow_shl_s64_by_2_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX6-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX6-NEXT: %extend:_(s64) = G_ZEXT %masked(s16) - ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: %shl:_(s64) = G_SHL %extend, %shiftamt(s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX6-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX6-NEXT: %extend:_(i64) = G_ZEXT %masked(i16) + ; GFX6-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX9-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) - ; GFX9-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) - %argument:_(s32) = COPY $vgpr0 - %narrow:_(s16) = G_TRUNC %argument - %masklow14:_(s16) = G_CONSTANT i16 16383 - %masked:_(s16) = G_AND %narrow, %masklow14 - %extend:_(s64) = G_ZEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s64) = G_SHL %extend, %shiftamt - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX9-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL %masked, [[C]](i16) + ; GFX9-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) + %argument:_(i32) = COPY $vgpr0 + %narrow:_(i16) = G_TRUNC %argument(i32) + %masklow14:_(i16) = G_CONSTANT i16 16383 + %masked:_(i16) = G_AND %narrow, %masklow14 + %extend:_(i64) = G_ZEXT %masked(i16) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + $vgpr0_vgpr1 = COPY %shl(i64) ... --- @@ -348,19 +348,19 @@ body: | ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %extend:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: $vgpr0 = COPY %extend(s32) + ; GFX6-NEXT: %extend:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: $vgpr0 = COPY %extend(i32) ; ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %extend:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: $vgpr0 = COPY %extend(s32) - %zero:_(s16) = G_CONSTANT i16 0 - %extend:_(s32) = G_ZEXT %zero:_(s16) - %shiftamt:_(s16) = G_CONSTANT i16 16 - %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - $vgpr0 = COPY %shl + ; GFX9-NEXT: %extend:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: $vgpr0 = COPY %extend(i32) + %zero:_(i16) = G_CONSTANT i16 0 + %extend:_(i32) = G_ZEXT %zero(i16) + %shiftamt:_(i16) = G_CONSTANT i16 16 + %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + $vgpr0 = COPY %shl(i32) ... --- @@ -374,23 +374,23 @@ body: | ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: %shl:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x i32>) ; ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) - %zero:_(s16) = G_CONSTANT i16 0 - %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16) - %shiftamt:_(s16) = G_CONSTANT i16 16 - %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt, %shiftamt:_(s16) - %extend:_(<2 x s32>) = G_ZEXT %zerovector:_(<2 x s16>) - %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: %shl:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x i32>) + %zero:_(i16) = G_CONSTANT i16 0 + %zerovector:_(<2 x i16>) = G_BUILD_VECTOR %zero(i16), %zero(i16) + %shiftamt:_(i16) = G_CONSTANT i16 16 + %shiftamtvector:_(<2 x i16>) = G_BUILD_VECTOR %shiftamt(i16), %shiftamt(i16) + %extend:_(<2 x i32>) = G_ZEXT %zerovector(<2 x i16>) + %shl:_(<2 x i32>) = G_SHL %extend, %shiftamtvector(<2 x i16>) + $vgpr0_vgpr1 = COPY %shl(<2 x i32>) ... --- @@ -404,26 +404,26 @@ body: | ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) - ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %truncate:_(i16) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %shiftamt:_(i16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %extend:_(i32) = G_ZEXT %truncate(i16) + ; GFX6-NEXT: %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(i32) ; ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) - ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) - %argument:_(s32) = COPY $vgpr0 - %truncate:_(s16) = G_TRUNC %argument:_(s32) - %shiftamt:_(s16) = G_CONSTANT i16 16 - %extend:_(s32) = G_ZEXT %truncate:_(s16) - %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - $vgpr0 = COPY %shl + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %truncate:_(i16) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %shiftamt:_(i16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %extend:_(i32) = G_ZEXT %truncate(i16) + ; GFX9-NEXT: %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(i32) + %argument:_(i32) = COPY $vgpr0 + %truncate:_(i16) = G_TRUNC %argument(i32) + %shiftamt:_(i16) = G_CONSTANT i16 16 + %extend:_(i32) = G_ZEXT %truncate(i16) + %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + $vgpr0 = COPY %shl(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir index 3780542cd8799..fde06a347c31e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir @@ -12,34 +12,34 @@ body: | ; GFX6-LABEL: name: narrow_shl_s32_by_2_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX6-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) - ; GFX6-NEXT: %shl:_(s32) = G_ZEXT [[SHL]](s16) - ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX6-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL %masked, [[C]](i16) + ; GFX6-NEXT: %shl:_(i32) = G_ZEXT [[SHL]](i16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(i32) ; ; GFX9-LABEL: name: narrow_shl_s32_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX9-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) - ; GFX9-NEXT: %shl:_(s32) = G_ZEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) - %argument:_(s32) = COPY $vgpr0 - %narrow:_(s16) = G_TRUNC %argument - %masklow14:_(s16) = G_CONSTANT i16 16383 - %masked:_(s16) = G_AND %narrow, %masklow14 - %extend:_(s32) = G_ZEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s32) = G_SHL %extend, %shiftamt - $vgpr0 = COPY %shl + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX9-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL %masked, [[C]](i16) + ; GFX9-NEXT: %shl:_(i32) = G_ZEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(i32) + %argument:_(i32) = COPY $vgpr0 + %narrow:_(i16) = G_TRUNC %argument(i32) + %masklow14:_(i16) = G_CONSTANT i16 16383 + %masked:_(i16) = G_AND %narrow, %masklow14 + %extend:_(i32) = G_ZEXT %masked(i16) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i32) = G_SHL %extend, %shiftamt(i32) + $vgpr0 = COPY %shl(i32) ... --- @@ -52,34 +52,34 @@ body: | ; GFX6-LABEL: name: narrow_shl_s64_by_2_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX6-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) - ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s16) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX6-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL %masked, [[C]](i16) + ; GFX6-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i16) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %narrow:_(s16) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX9-NEXT: %masked:_(s16) = G_AND %narrow, %masklow14 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) - ; GFX9-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) - %argument:_(s32) = COPY $vgpr0 - %narrow:_(s16) = G_TRUNC %argument - %masklow14:_(s16) = G_CONSTANT i16 16383 - %masked:_(s16) = G_AND %narrow, %masklow14 - %extend:_(s64) = G_ZEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shl:_(s64) = G_SHL %extend, %shiftamt - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %narrow:_(i16) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX9-NEXT: %masked:_(i16) = G_AND %narrow, %masklow14 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL %masked, [[C]](i16) + ; GFX9-NEXT: %shl:_(i64) = G_ZEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(i64) + %argument:_(i32) = COPY $vgpr0 + %narrow:_(i16) = G_TRUNC %argument(i32) + %masklow14:_(i16) = G_CONSTANT i16 16383 + %masked:_(i16) = G_AND %narrow, %masklow14 + %extend:_(i64) = G_ZEXT %masked(i16) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shl:_(i64) = G_SHL %extend, %shiftamt(i32) + $vgpr0_vgpr1 = COPY %shl(i64) ... --- @@ -92,35 +92,35 @@ body: | ; GFX6-LABEL: name: narrow_shl_s16_by_2_from_zext_s8 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %narrow:_(s8) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %masklow6:_(s8) = G_CONSTANT i8 63 - ; GFX6-NEXT: %masked:_(s8) = G_AND %narrow, %masklow6 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s8) = G_SHL %masked, [[C]](s8) - ; GFX6-NEXT: %result:_(s32) = G_ZEXT [[SHL]](s8) - ; GFX6-NEXT: $vgpr0 = COPY %result(s32) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %narrow:_(i8) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %masklow6:_(i8) = G_CONSTANT i8 63 + ; GFX6-NEXT: %masked:_(i8) = G_AND %narrow, %masklow6 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 2 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i8) = G_SHL %masked, [[C]](i8) + ; GFX6-NEXT: %result:_(i32) = G_ZEXT [[SHL]](i8) + ; GFX6-NEXT: $vgpr0 = COPY %result(i32) ; ; GFX9-LABEL: name: narrow_shl_s16_by_2_from_zext_s8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %narrow:_(s8) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %masklow6:_(s8) = G_CONSTANT i8 63 - ; GFX9-NEXT: %masked:_(s8) = G_AND %narrow, %masklow6 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s8) = G_SHL %masked, [[C]](s8) - ; GFX9-NEXT: %result:_(s32) = G_ZEXT [[SHL]](s8) - ; GFX9-NEXT: $vgpr0 = COPY %result(s32) - %argument:_(s32) = COPY $vgpr0 - %narrow:_(s8) = G_TRUNC %argument - %masklow6:_(s8) = G_CONSTANT i8 63 - %masked:_(s8) = G_AND %narrow, %masklow6 - %extend:_(s16) = G_ZEXT %masked - %shiftamt:_(s16) = G_CONSTANT i16 2 - %shl:_(s16) = G_SHL %extend, %shiftamt - %result:_(s32) = G_ANYEXT %shl - $vgpr0 = COPY %result + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %narrow:_(i8) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %masklow6:_(i8) = G_CONSTANT i8 63 + ; GFX9-NEXT: %masked:_(i8) = G_AND %narrow, %masklow6 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i8) = G_CONSTANT i8 2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i8) = G_SHL %masked, [[C]](i8) + ; GFX9-NEXT: %result:_(i32) = G_ZEXT [[SHL]](i8) + ; GFX9-NEXT: $vgpr0 = COPY %result(i32) + %argument:_(i32) = COPY $vgpr0 + %narrow:_(i8) = G_TRUNC %argument(i32) + %masklow6:_(i8) = G_CONSTANT i8 63 + %masked:_(i8) = G_AND %narrow, %masklow6 + %extend:_(i16) = G_ZEXT %masked(i8) + %shiftamt:_(i16) = G_CONSTANT i16 2 + %shl:_(i16) = G_SHL %extend, %shiftamt(i16) + %result:_(i32) = G_ANYEXT %shl(i16) + $vgpr0 = COPY %result(i32) ... --- @@ -133,37 +133,37 @@ body: | ; GFX6-LABEL: name: narrow_shl_v2s32_by_2_from_zext_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %narrow:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX6-NEXT: %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14(s16), %masklow14(s16) - ; GFX6-NEXT: %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>) - ; GFX6-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; GFX6-NEXT: %narrow:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX6-NEXT: %masklow14vec:_(<2 x i16>) = G_BUILD_VECTOR %masklow14(i16), %masklow14(i16) + ; GFX6-NEXT: %masked:_(<2 x i16>) = G_AND %narrow, %masklow14vec + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x i16>) + ; GFX6-NEXT: %shl:_(<2 x i32>) = G_ZEXT [[SHL]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x i32>) ; ; GFX9-LABEL: name: narrow_shl_v2s32_by_2_from_zext_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %narrow:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 - ; GFX9-NEXT: %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14(s16), %masklow14(s16) - ; GFX9-NEXT: %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) - %narrow:_(<2 x s16>) = COPY $vgpr0 - %masklow14:_(s16) = G_CONSTANT i16 16383 - %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14, %masklow14 - %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec - %extend:_(<2 x s32>) = G_ZEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt, %shiftamt - %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvec - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: %narrow:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: %masklow14:_(i16) = G_CONSTANT i16 16383 + ; GFX9-NEXT: %masklow14vec:_(<2 x i16>) = G_BUILD_VECTOR %masklow14(i16), %masklow14(i16) + ; GFX9-NEXT: %masked:_(<2 x i16>) = G_AND %narrow, %masklow14vec + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: %shl:_(<2 x i32>) = G_ZEXT [[SHL]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x i32>) + %narrow:_(<2 x i16>) = COPY $vgpr0 + %masklow14:_(i16) = G_CONSTANT i16 16383 + %masklow14vec:_(<2 x i16>) = G_BUILD_VECTOR %masklow14(i16), %masklow14(i16) + %masked:_(<2 x i16>) = G_AND %narrow, %masklow14vec + %extend:_(<2 x i32>) = G_ZEXT %masked(<2 x i16>) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shiftamtvec:_(<2 x i32>) = G_BUILD_VECTOR %shiftamt(i32), %shiftamt(i32) + %shl:_(<2 x i32>) = G_SHL %extend, %shiftamtvec(<2 x i32>) + $vgpr0_vgpr1 = COPY %shl(<2 x i32>) ... --- @@ -176,37 +176,37 @@ body: | ; GFX6-LABEL: name: narrow_shl_v2s64_by_2_from_anyext_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %narrow:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX6-NEXT: %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30(s32), %masklow30(s32) - ; GFX6-NEXT: %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec - ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>) - ; GFX6-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>) + ; GFX6-NEXT: %narrow:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX6-NEXT: %masklow30vec:_(<2 x i32>) = G_BUILD_VECTOR %masklow30(i32), %masklow30(i32) + ; GFX6-NEXT: %masked:_(<2 x i32>) = G_AND %narrow, %masklow30vec + ; GFX6-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 2 + ; GFX6-NEXT: %shiftamtvec:_(<2 x i32>) = G_BUILD_VECTOR %shiftamt(i32), %shiftamt(i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x i32>) = G_SHL %masked, %shiftamtvec(<2 x i32>) + ; GFX6-NEXT: %shl:_(<2 x i64>) = G_ZEXT [[SHL]](<2 x i32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x i64>) ; ; GFX9-LABEL: name: narrow_shl_v2s64_by_2_from_anyext_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %narrow:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 - ; GFX9-NEXT: %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30(s32), %masklow30(s32) - ; GFX9-NEXT: %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec - ; GFX9-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>) - ; GFX9-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>) - %narrow:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %masklow30:_(s32) = G_CONSTANT i32 1073741823 - %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30, %masklow30 - %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec - %extend:_(<2 x s64>) = G_ANYEXT %masked - %shiftamt:_(s32) = G_CONSTANT i32 2 - %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt, %shiftamt - %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl + ; GFX9-NEXT: %narrow:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: %masklow30:_(i32) = G_CONSTANT i32 1073741823 + ; GFX9-NEXT: %masklow30vec:_(<2 x i32>) = G_BUILD_VECTOR %masklow30(i32), %masklow30(i32) + ; GFX9-NEXT: %masked:_(<2 x i32>) = G_AND %narrow, %masklow30vec + ; GFX9-NEXT: %shiftamt:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: %shiftamtvec:_(<2 x i32>) = G_BUILD_VECTOR %shiftamt(i32), %shiftamt(i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i32>) = G_SHL %masked, %shiftamtvec(<2 x i32>) + ; GFX9-NEXT: %shl:_(<2 x i64>) = G_ZEXT [[SHL]](<2 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x i64>) + %narrow:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %masklow30:_(i32) = G_CONSTANT i32 1073741823 + %masklow30vec:_(<2 x i32>) = G_BUILD_VECTOR %masklow30(i32), %masklow30(i32) + %masked:_(<2 x i32>) = G_AND %narrow, %masklow30vec + %extend:_(<2 x i64>) = G_ANYEXT %masked(<2 x i32>) + %shiftamt:_(i32) = G_CONSTANT i32 2 + %shiftamtvec:_(<2 x i32>) = G_BUILD_VECTOR %shiftamt(i32), %shiftamt(i32) + %shl:_(<2 x i64>) = G_SHL %extend, %shiftamtvec(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x i64>) ... --- @@ -220,19 +220,19 @@ body: | ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %extend:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: $vgpr0 = COPY %extend(s32) + ; GFX6-NEXT: %extend:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: $vgpr0 = COPY %extend(i32) ; ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %extend:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: $vgpr0 = COPY %extend(s32) - %zero:_(s16) = G_CONSTANT i16 0 - %extend:_(s32) = G_ZEXT %zero:_(s16) - %shiftamt:_(s16) = G_CONSTANT i16 16 - %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - $vgpr0 = COPY %shl + ; GFX9-NEXT: %extend:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: $vgpr0 = COPY %extend(i32) + %zero:_(i16) = G_CONSTANT i16 0 + %extend:_(i32) = G_ZEXT %zero(i16) + %shiftamt:_(i16) = G_CONSTANT i16 16 + %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + $vgpr0 = COPY %shl(i32) ... --- @@ -246,23 +246,23 @@ body: | ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: %shl:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x i32>) ; ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) - %zero:_(s16) = G_CONSTANT i16 0 - %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16) - %shiftamt:_(s16) = G_CONSTANT i16 16 - %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt, %shiftamt:_(s16) - %extend:_(<2 x s32>) = G_ZEXT %zerovector:_(<2 x s16>) - %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector - $vgpr0_vgpr1 = COPY %shl + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: %shl:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x i32>) + %zero:_(i16) = G_CONSTANT i16 0 + %zerovector:_(<2 x i16>) = G_BUILD_VECTOR %zero(i16), %zero(i16) + %shiftamt:_(i16) = G_CONSTANT i16 16 + %shiftamtvector:_(<2 x i16>) = G_BUILD_VECTOR %shiftamt(i16), %shiftamt(i16) + %extend:_(<2 x i32>) = G_ZEXT %zerovector(<2 x i16>) + %shl:_(<2 x i32>) = G_SHL %extend, %shiftamtvector(<2 x i16>) + $vgpr0_vgpr1 = COPY %shl(<2 x i32>) ... --- @@ -276,26 +276,26 @@ body: | ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) - ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) - ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX6-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: %truncate:_(i16) = G_TRUNC %argument(i32) + ; GFX6-NEXT: %shiftamt:_(i16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %extend:_(i32) = G_ZEXT %truncate(i16) + ; GFX6-NEXT: %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(i32) ; ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) - ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) - ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) - %argument:_(s32) = COPY $vgpr0 - %truncate:_(s16) = G_TRUNC %argument:_(s32) - %shiftamt:_(s16) = G_CONSTANT i16 16 - %extend:_(s32) = G_ZEXT %truncate:_(s16) - %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - $vgpr0 = COPY %shl + ; GFX9-NEXT: %argument:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: %truncate:_(i16) = G_TRUNC %argument(i32) + ; GFX9-NEXT: %shiftamt:_(i16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %extend:_(i32) = G_ZEXT %truncate(i16) + ; GFX9-NEXT: %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(i32) + %argument:_(i32) = COPY $vgpr0 + %truncate:_(i16) = G_TRUNC %argument(i32) + %shiftamt:_(i16) = G_CONSTANT i16 16 + %extend:_(i32) = G_ZEXT %truncate(i16) + %shl:_(i32) = G_SHL %extend, %shiftamt(i16) + $vgpr0 = COPY %shl(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir index f939742ecba61..e8dbdab549a1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir @@ -12,15 +12,15 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_32_s64amt ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[TRUNC]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 32 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C]](i32), [[TRUNC]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 32 + %2:_(i64) = G_SHL %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -33,15 +33,15 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[TRUNC]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 32 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C]](i32), [[TRUNC]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 32 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -54,17 +54,17 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 33 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[TRUNC]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C1]](i32), [[SHL]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -77,14 +77,14 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_31 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 31 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -97,17 +97,17 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_63 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 63 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[TRUNC]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C1]](i32), [[SHL]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 63 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -120,12 +120,12 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 64 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 64 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -138,12 +138,12 @@ body: | ; CHECK-LABEL: name: narrow_shl_s64_65 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 65 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 65 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -156,14 +156,14 @@ body: | ; CHECK-LABEL: name: narrow_shl_s32_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 16 - %2:_(s32) = G_SHL %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 16 + %2:_(i32) = G_SHL %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -176,14 +176,14 @@ body: | ; CHECK-LABEL: name: narrow_shl_s32_17 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 17 - %2:_(s32) = G_SHL %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 17 + %2:_(i32) = G_SHL %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -196,14 +196,14 @@ body: | ; CHECK-LABEL: name: narrow_shl_v2s32_17 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 17 - %2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1 - %3:_(<2 x s32>) = G_SHL %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x i32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 17 + %2:_(<2 x i32>) = G_BUILD_VECTOR %1(i32), %1(i32) + %3:_(<2 x i32>) = G_SHL %0, %2(<2 x i32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir index df7fc56799137..e1d04d52b2c7c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir @@ -12,16 +12,16 @@ body: | ; CHECK-LABEL: name: trunc_s32_shl_s64_5 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_SHL %0:_, %1 - %3:_(s32) = G_TRUNC %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[TRUNC]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_SHL %0, %1(i32) + %3:_(i32) = G_TRUNC %2(i64) + $vgpr0 = COPY %3(i32) ... --- @@ -35,16 +35,16 @@ body: | ; CHECK-LABEL: name: trunc_s16_shl_s32_5 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_SHL %0:_, %1 - %3:_(s16) = G_TRUNC %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_SHL %0, %1(i32) + %3:_(i16) = G_TRUNC %2(i32) + S_ENDPGM 0, implicit %3(i16) ... @@ -59,16 +59,16 @@ body: | ; CHECK-LABEL: name: trunc_s16_shl_s64_5 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_SHL %0:_, %1 - %3:_(s16) = G_TRUNC %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_SHL %0, %1(i32) + %3:_(i16) = G_TRUNC %2(i64) + S_ENDPGM 0, implicit %3(i16) ... @@ -81,21 +81,21 @@ body: | ; CHECK-LABEL: name: s16_trunc_s64_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 - ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) - ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) - %0:_(s32) = COPY $vgpr0 - %src:_(s64) = G_ZEXT %0 - %amt:_(s32) = G_CONSTANT i32 16 - %shift:_(s64) = G_LSHR %src, %amt - %trunc:_(s16) = G_TRUNC %shift - %foo:_(s16) = G_CONSTANT i16 55 - %keep:_(s32) = G_MERGE_VALUES %trunc, %foo - $vgpr0 = COPY %keep + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], %amt(i32) + ; CHECK-NEXT: %trunc:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: %foo:_(i16) = G_CONSTANT i16 55 + ; CHECK-NEXT: %keep:_(i32) = G_MERGE_VALUES %trunc(i16), %foo(i16) + ; CHECK-NEXT: $vgpr0 = COPY %keep(i32) + %0:_(i32) = COPY $vgpr0 + %src:_(i64) = G_ZEXT %0(i32) + %amt:_(i32) = G_CONSTANT i32 16 + %shift:_(i64) = G_LSHR %src, %amt(i32) + %trunc:_(i16) = G_TRUNC %shift(i64) + %foo:_(i16) = G_CONSTANT i16 55 + %keep:_(i32) = G_MERGE_VALUES %trunc(i16), %foo(i16) + $vgpr0 = COPY %keep(i32) ... --- @@ -107,21 +107,21 @@ body: | ; CHECK-LABEL: name: s16_trunc_s64_ashr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[ASHR]](s32) - ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 - ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) - ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) - %0:_(s32) = COPY $vgpr0 - %src:_(s64) = G_ZEXT %0 - %amt:_(s32) = G_CONSTANT i32 16 - %shift:_(s64) = G_ASHR %src, %amt - %trunc:_(s16) = G_TRUNC %shift - %foo:_(s16) = G_CONSTANT i16 55 - %keep:_(s32) = G_MERGE_VALUES %trunc, %foo - $vgpr0 = COPY %keep + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], %amt(i32) + ; CHECK-NEXT: %trunc:_(i16) = G_TRUNC [[ASHR]](i32) + ; CHECK-NEXT: %foo:_(i16) = G_CONSTANT i16 55 + ; CHECK-NEXT: %keep:_(i32) = G_MERGE_VALUES %trunc(i16), %foo(i16) + ; CHECK-NEXT: $vgpr0 = COPY %keep(i32) + %0:_(i32) = COPY $vgpr0 + %src:_(i64) = G_ZEXT %0(i32) + %amt:_(i32) = G_CONSTANT i32 16 + %shift:_(i64) = G_ASHR %src, %amt(i32) + %trunc:_(i16) = G_TRUNC %shift(i64) + %foo:_(i16) = G_CONSTANT i16 55 + %keep:_(i32) = G_MERGE_VALUES %trunc(i16), %foo(i16) + $vgpr0 = COPY %keep(i32) ... --- @@ -133,22 +133,22 @@ body: | ; CHECK-LABEL: name: s16_trunc_s64_lshr_17_nofold ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64) - ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 - ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) - ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) - %0:_(s32) = COPY $vgpr0 - %src:_(s64) = G_ZEXT %0 - %amt:_(s32) = G_CONSTANT i32 17 - %shift:_(s64) = G_LSHR %src, %amt - %trunc:_(s16) = G_TRUNC %shift - %foo:_(s16) = G_CONSTANT i16 55 - %keep:_(s32) = G_MERGE_VALUES %trunc, %foo - $vgpr0 = COPY %keep + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: %amt:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: %shift:_(i64) = G_LSHR %src, %amt(i32) + ; CHECK-NEXT: %trunc:_(i16) = G_TRUNC %shift(i64) + ; CHECK-NEXT: %foo:_(i16) = G_CONSTANT i16 55 + ; CHECK-NEXT: %keep:_(i32) = G_MERGE_VALUES %trunc(i16), %foo(i16) + ; CHECK-NEXT: $vgpr0 = COPY %keep(i32) + %0:_(i32) = COPY $vgpr0 + %src:_(i64) = G_ZEXT %0(i32) + %amt:_(i32) = G_CONSTANT i32 17 + %shift:_(i64) = G_LSHR %src, %amt(i32) + %trunc:_(i16) = G_TRUNC %shift(i64) + %foo:_(i16) = G_CONSTANT i16 55 + %keep:_(i32) = G_MERGE_VALUES %trunc(i16), %foo(i16) + $vgpr0 = COPY %keep(i32) ... --- @@ -160,23 +160,23 @@ body: | ; CHECK-LABEL: name: s26_trunc_s64_lshr_6 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32) - ; CHECK-NEXT: %trunc:_(s26) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: %foo:_(s26) = G_CONSTANT i26 55 - ; CHECK-NEXT: %keep0:_(s26) = G_ADD %trunc, %foo - ; CHECK-NEXT: %keep1:_(s32) = G_ANYEXT %keep0(s26) - ; CHECK-NEXT: $vgpr0 = COPY %keep1(s32) - %0:_(s32) = COPY $vgpr0 - %src:_(s64) = G_ZEXT %0 - %amt:_(s32) = G_CONSTANT i32 6 - %shift:_(s64) = G_LSHR %src, %amt - %trunc:_(s26) = G_TRUNC %shift - %foo:_(s26) = G_CONSTANT i26 55 - %keep0:_(s26) = G_ADD %trunc, %foo - %keep1:_(s32) = G_ANYEXT %keep0 - $vgpr0 = COPY %keep1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], %amt(i32) + ; CHECK-NEXT: %trunc:_(i26) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: %foo:_(i26) = G_CONSTANT i26 55 + ; CHECK-NEXT: %keep0:_(i26) = G_ADD %trunc, %foo + ; CHECK-NEXT: %keep1:_(i32) = G_ANYEXT %keep0(i26) + ; CHECK-NEXT: $vgpr0 = COPY %keep1(i32) + %0:_(i32) = COPY $vgpr0 + %src:_(i64) = G_ZEXT %0(i32) + %amt:_(i32) = G_CONSTANT i32 6 + %shift:_(i64) = G_LSHR %src, %amt(i32) + %trunc:_(i26) = G_TRUNC %shift(i64) + %foo:_(i26) = G_CONSTANT i26 55 + %keep0:_(i26) = G_ADD %trunc, %foo + %keep1:_(i32) = G_ANYEXT %keep0(i26) + $vgpr0 = COPY %keep1(i32) ... --- @@ -188,22 +188,22 @@ body: | ; CHECK-LABEL: name: s26_trunc_s64_lshr_7_nofold ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32) - ; CHECK-NEXT: %trunc:_(s26) = G_TRUNC %shift(s64) - ; CHECK-NEXT: %foo:_(s26) = G_CONSTANT i26 55 - ; CHECK-NEXT: %keep0:_(s26) = G_ADD %trunc, %foo - ; CHECK-NEXT: %keep1:_(s32) = G_ANYEXT %keep0(s26) - ; CHECK-NEXT: $vgpr0 = COPY %keep1(s32) - %0:_(s32) = COPY $vgpr0 - %src:_(s64) = G_ZEXT %0 - %amt:_(s32) = G_CONSTANT i32 7 - %shift:_(s64) = G_LSHR %src, %amt - %trunc:_(s26) = G_TRUNC %shift - %foo:_(s26) = G_CONSTANT i26 55 - %keep0:_(s26) = G_ADD %trunc, %foo - %keep1:_(s32) = G_ANYEXT %keep0 - $vgpr0 = COPY %keep1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: %amt:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: %shift:_(i64) = G_LSHR %src, %amt(i32) + ; CHECK-NEXT: %trunc:_(i26) = G_TRUNC %shift(i64) + ; CHECK-NEXT: %foo:_(i26) = G_CONSTANT i26 55 + ; CHECK-NEXT: %keep0:_(i26) = G_ADD %trunc, %foo + ; CHECK-NEXT: %keep1:_(i32) = G_ANYEXT %keep0(i26) + ; CHECK-NEXT: $vgpr0 = COPY %keep1(i32) + %0:_(i32) = COPY $vgpr0 + %src:_(i64) = G_ZEXT %0(i32) + %amt:_(i32) = G_CONSTANT i32 7 + %shift:_(i64) = G_LSHR %src, %amt(i32) + %trunc:_(i26) = G_TRUNC %shift(i64) + %foo:_(i26) = G_CONSTANT i26 55 + %keep0:_(i26) = G_ADD %trunc, %foo + %keep1:_(i32) = G_ANYEXT %keep0(i26) + $vgpr0 = COPY %keep1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir index 4f0cb877ced77..cdb72b5ee364b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir @@ -11,14 +11,14 @@ body: | ; GCN-LABEL: name: urem_s32_var_const0 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %const:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: %rem:_(s32) = G_UREM %var, %const - ; GCN-NEXT: $vgpr0 = COPY %rem(s32) - %var:_(s32) = COPY $vgpr0 - %const:_(s32) = G_CONSTANT i32 0 - %rem:_(s32) = G_UREM %var, %const - $vgpr0 = COPY %rem + ; GCN-NEXT: %var:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %const:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: %rem:_(i32) = G_UREM %var, %const + ; GCN-NEXT: $vgpr0 = COPY %rem(i32) + %var:_(i32) = COPY $vgpr0 + %const:_(i32) = G_CONSTANT i32 0 + %rem:_(i32) = G_UREM %var, %const + $vgpr0 = COPY %rem(i32) ... --- @@ -31,12 +31,12 @@ body: | ; GCN-LABEL: name: urem_s32_var_const1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - %var:_(s32) = COPY $vgpr0 - %const:_(s32) = G_CONSTANT i32 1 - %rem:_(s32) = G_UREM %var, %const - $vgpr0 = COPY %rem + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: $vgpr0 = COPY [[C]](i32) + %var:_(i32) = COPY $vgpr0 + %const:_(i32) = G_CONSTANT i32 1 + %rem:_(i32) = G_UREM %var, %const + $vgpr0 = COPY %rem(i32) ... --- @@ -49,14 +49,14 @@ body: | ; GCN-LABEL: name: urem_s32_var_const2 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GCN-NEXT: %rem:_(s32) = G_AND %var, [[C]] - ; GCN-NEXT: $vgpr0 = COPY %rem(s32) - %var:_(s32) = COPY $vgpr0 - %const:_(s32) = G_CONSTANT i32 2 - %rem:_(s32) = G_UREM %var, %const - $vgpr0 = COPY %rem + ; GCN-NEXT: %var:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GCN-NEXT: %rem:_(i32) = G_AND %var, [[C]] + ; GCN-NEXT: $vgpr0 = COPY %rem(i32) + %var:_(i32) = COPY $vgpr0 + %const:_(i32) = G_CONSTANT i32 2 + %rem:_(i32) = G_UREM %var, %const + $vgpr0 = COPY %rem(i32) ... --- @@ -69,20 +69,20 @@ body: | ; GCN-LABEL: name: urem_s32_var_shl1 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %shift_amt:_(s32) = COPY $vgpr1 - ; GCN-NEXT: %one:_(s32) = G_CONSTANT i32 1 - ; GCN-NEXT: %one_bit:_(s32) = G_SHL %one, %shift_amt(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD %one_bit, [[C]] - ; GCN-NEXT: %rem:_(s32) = G_AND %var, [[ADD]] - ; GCN-NEXT: $vgpr0 = COPY %rem(s32) - %var:_(s32) = COPY $vgpr0 - %shift_amt:_(s32) = COPY $vgpr1 - %one:_(s32) = G_CONSTANT i32 1 - %one_bit:_(s32) = G_SHL %one, %shift_amt - %rem:_(s32) = G_UREM %var, %one_bit - $vgpr0 = COPY %rem + ; GCN-NEXT: %var:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %shift_amt:_(i32) = COPY $vgpr1 + ; GCN-NEXT: %one:_(i32) = G_CONSTANT i32 1 + ; GCN-NEXT: %one_bit:_(i32) = G_SHL %one, %shift_amt(i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD %one_bit, [[C]] + ; GCN-NEXT: %rem:_(i32) = G_AND %var, [[ADD]] + ; GCN-NEXT: $vgpr0 = COPY %rem(i32) + %var:_(i32) = COPY $vgpr0 + %shift_amt:_(i32) = COPY $vgpr1 + %one:_(i32) = G_CONSTANT i32 1 + %one_bit:_(i32) = G_SHL %one, %shift_amt(i32) + %rem:_(i32) = G_UREM %var, %one_bit + $vgpr0 = COPY %rem(i32) ... --- @@ -95,20 +95,20 @@ body: | ; GCN-LABEL: name: urem_s64_var_shl1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %shiftamt:_(s32) = COPY $vgpr2 - ; GCN-NEXT: %one:_(s64) = G_CONSTANT i64 1 - ; GCN-NEXT: %one_bit:_(s64) = G_SHL %one, %shiftamt(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %one_bit, [[C]] - ; GCN-NEXT: %rem:_(s64) = G_AND %var, [[ADD]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(s64) - %var:_(s64) = COPY $vgpr0_vgpr1 - %shiftamt:_(s32) = COPY $vgpr2 - %one:_(s64) = G_CONSTANT i64 1 - %one_bit:_(s64) = G_SHL %one, %shiftamt - %rem:_(s64) = G_UREM %var, %one_bit - $vgpr0_vgpr1 = COPY %rem + ; GCN-NEXT: %var:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %shiftamt:_(i32) = COPY $vgpr2 + ; GCN-NEXT: %one:_(i64) = G_CONSTANT i64 1 + ; GCN-NEXT: %one_bit:_(i64) = G_SHL %one, %shiftamt(i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i64) = G_ADD %one_bit, [[C]] + ; GCN-NEXT: %rem:_(i64) = G_AND %var, [[ADD]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(i64) + %var:_(i64) = COPY $vgpr0_vgpr1 + %shiftamt:_(i32) = COPY $vgpr2 + %one:_(i64) = G_CONSTANT i64 1 + %one_bit:_(i64) = G_SHL %one, %shiftamt(i32) + %rem:_(i64) = G_UREM %var, %one_bit + $vgpr0_vgpr1 = COPY %rem(i64) ... --- @@ -121,20 +121,20 @@ body: | ; GCN-LABEL: name: urem_v2s32_var_shl1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %shift_amt:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %one:_(s32) = G_CONSTANT i32 1 - ; GCN-NEXT: %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one(s32), %one(s32) - ; GCN-NEXT: %one_bit:_(<2 x s32>) = G_SHL %one_vec, %shift_amt(<2 x s32>) - ; GCN-NEXT: %rem:_(<2 x s32>) = G_UREM %var, %one_bit - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(<2 x s32>) - %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %shift_amt:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %one:_(s32) = G_CONSTANT i32 1 - %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one, %one - %one_bit:_(<2 x s32>) = G_SHL %one_vec, %shift_amt - %rem:_(<2 x s32>) = G_UREM %var, %one_bit - $vgpr0_vgpr1 = COPY %rem + ; GCN-NEXT: %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %shift_amt:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: %one:_(i32) = G_CONSTANT i32 1 + ; GCN-NEXT: %one_vec:_(<2 x i32>) = G_BUILD_VECTOR %one(i32), %one(i32) + ; GCN-NEXT: %one_bit:_(<2 x i32>) = G_SHL %one_vec, %shift_amt(<2 x i32>) + ; GCN-NEXT: %rem:_(<2 x i32>) = G_UREM %var, %one_bit + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(<2 x i32>) + %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %shift_amt:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %one:_(i32) = G_CONSTANT i32 1 + %one_vec:_(<2 x i32>) = G_BUILD_VECTOR %one(i32), %one(i32) + %one_bit:_(<2 x i32>) = G_SHL %one_vec, %shift_amt(<2 x i32>) + %rem:_(<2 x i32>) = G_UREM %var, %one_bit + $vgpr0_vgpr1 = COPY %rem(<2 x i32>) ... --- @@ -147,20 +147,20 @@ body: | ; GCN-LABEL: name: urem_v2s16_var_const4_build_vector_trunc ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: %four:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four(s32), %four(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD %four_vec, [[BUILD_VECTOR]] - ; GCN-NEXT: %rem:_(<2 x s16>) = G_AND %var, [[ADD]] - ; GCN-NEXT: $vgpr0 = COPY %rem(<2 x s16>) - %var:_(<2 x s16>) = COPY $vgpr0 - %shift_amt:_(s32) = COPY $vgpr1 - %four:_(s32) = G_CONSTANT i32 4 - %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four, %four - %rem:_(<2 x s16>) = G_UREM %var, %four_vec - $vgpr0 = COPY %rem + ; GCN-NEXT: %var:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: %four:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: %four_vec:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %four(i32), %four(i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(<2 x i16>) = G_ADD %four_vec, [[BUILD_VECTOR]] + ; GCN-NEXT: %rem:_(<2 x i16>) = G_AND %var, [[ADD]] + ; GCN-NEXT: $vgpr0 = COPY %rem(<2 x i16>) + %var:_(<2 x i16>) = COPY $vgpr0 + %shift_amt:_(i32) = COPY $vgpr1 + %four:_(i32) = G_CONSTANT i32 4 + %four_vec:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %four(i32), %four(i32) + %rem:_(<2 x i16>) = G_UREM %var, %four_vec + $vgpr0 = COPY %rem(<2 x i16>) ... # The shl is a known power of two, but we do not know if the final @@ -175,22 +175,22 @@ body: | ; GCN-LABEL: name: urem_v2s16_var_nonconst_build_vector_trunc ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: %shift_amt:_(s32) = COPY $vgpr1 - ; GCN-NEXT: %two:_(s32) = G_CONSTANT i32 2 - ; GCN-NEXT: %four:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: %shift:_(s32) = G_SHL %two, %shift_amt(s32) - ; GCN-NEXT: %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four(s32), %shift(s32) - ; GCN-NEXT: %rem:_(<2 x s16>) = G_UREM %var, %four_vec - ; GCN-NEXT: $vgpr0 = COPY %rem(<2 x s16>) - %var:_(<2 x s16>) = COPY $vgpr0 - %shift_amt:_(s32) = COPY $vgpr1 - %two:_(s32) = G_CONSTANT i32 2 - %four:_(s32) = G_CONSTANT i32 4 - %shift:_(s32) = G_SHL %two, %shift_amt - %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four, %shift - %rem:_(<2 x s16>) = G_UREM %var, %four_vec - $vgpr0 = COPY %rem + ; GCN-NEXT: %var:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: %shift_amt:_(i32) = COPY $vgpr1 + ; GCN-NEXT: %two:_(i32) = G_CONSTANT i32 2 + ; GCN-NEXT: %four:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: %shift:_(i32) = G_SHL %two, %shift_amt(i32) + ; GCN-NEXT: %four_vec:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %four(i32), %shift(i32) + ; GCN-NEXT: %rem:_(<2 x i16>) = G_UREM %var, %four_vec + ; GCN-NEXT: $vgpr0 = COPY %rem(<2 x i16>) + %var:_(<2 x i16>) = COPY $vgpr0 + %shift_amt:_(i32) = COPY $vgpr1 + %two:_(i32) = G_CONSTANT i32 2 + %four:_(i32) = G_CONSTANT i32 4 + %shift:_(i32) = G_SHL %two, %shift_amt(i32) + %four_vec:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %four(i32), %shift(i32) + %rem:_(<2 x i16>) = G_UREM %var, %four_vec + $vgpr0 = COPY %rem(<2 x i16>) ... --- @@ -203,16 +203,16 @@ body: | ; GCN-LABEL: name: v_urem_v2i32_pow2k_denom ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4095 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; GCN-NEXT: %rem:_(<2 x s32>) = G_AND %var, [[BUILD_VECTOR]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(<2 x s32>) - %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %pow2:_(s32) = G_CONSTANT i32 4096 - %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2(s32), %pow2(s32) - %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec - $vgpr0_vgpr1 = COPY %rem + ; GCN-NEXT: %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4095 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GCN-NEXT: %rem:_(<2 x i32>) = G_AND %var, [[BUILD_VECTOR]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(<2 x i32>) + %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %pow2:_(i32) = G_CONSTANT i32 4096 + %pow2_vec:_(<2 x i32>) = G_BUILD_VECTOR %pow2(i32), %pow2(i32) + %rem:_(<2 x i32>) = G_UREM %var, %pow2_vec + $vgpr0_vgpr1 = COPY %rem(<2 x i32>) ... --- @@ -225,18 +225,18 @@ body: | ; GCN-LABEL: name: v_urem_v2i32_pow2k_not_splat_denom ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4095 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) - ; GCN-NEXT: %rem:_(<2 x s32>) = G_AND %var, [[BUILD_VECTOR]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(<2 x s32>) - %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %pow2_1:_(s32) = G_CONSTANT i32 4096 - %pow2_2:_(s32) = G_CONSTANT i32 2048 - %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2_1(s32), %pow2_2(s32) - %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec - $vgpr0_vgpr1 = COPY %rem + ; GCN-NEXT: %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4095 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2047 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32) + ; GCN-NEXT: %rem:_(<2 x i32>) = G_AND %var, [[BUILD_VECTOR]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(<2 x i32>) + %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %pow2_1:_(i32) = G_CONSTANT i32 4096 + %pow2_2:_(i32) = G_CONSTANT i32 2048 + %pow2_vec:_(<2 x i32>) = G_BUILD_VECTOR %pow2_1(i32), %pow2_2(i32) + %rem:_(<2 x i32>) = G_UREM %var, %pow2_vec + $vgpr0_vgpr1 = COPY %rem(<2 x i32>) ... --- @@ -249,14 +249,14 @@ body: | ; GCN-LABEL: name: v_urem_v2i64_pow2k_denom ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4095 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; GCN-NEXT: %rem:_(<2 x s64>) = G_AND %var, [[BUILD_VECTOR]] - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem(<2 x s64>) - %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %pow2:_(s64) = G_CONSTANT i64 4096 - %pow2_vec:_(<2 x s64>) = G_BUILD_VECTOR %pow2(s64), %pow2(s64) - %rem:_(<2 x s64>) = G_UREM %var, %pow2_vec - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem + ; GCN-NEXT: %var:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4095 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64) + ; GCN-NEXT: %rem:_(<2 x i64>) = G_AND %var, [[BUILD_VECTOR]] + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem(<2 x i64>) + %var:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %pow2:_(i64) = G_CONSTANT i64 4096 + %pow2_vec:_(<2 x i64>) = G_BUILD_VECTOR %pow2(i64), %pow2(i64) + %rem:_(<2 x i64>) = G_UREM %var, %pow2_vec + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir index 3423af64162e5..a1e995002bb30 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir @@ -11,18 +11,18 @@ body: | ; GCN-LABEL: name: zext_trunc_s32_s16_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383 - ; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]] - ; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16) - ; GCN-NEXT: $vgpr0 = COPY %zext(s32) - %var:_(s32) = COPY $vgpr0 - %c3FFF:_(s32) = G_CONSTANT i32 16383 - %low_bits:_(s32) = G_AND %var, %c3FFF - %trunc:_(s16) = G_TRUNC %low_bits(s32) - %zext:_(s32) = G_ZEXT %trunc(s16) - $vgpr0 = COPY %zext(s32) + ; GCN-NEXT: %var:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC %var(i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 16383 + ; GCN-NEXT: %trunc:_(i16) = G_AND [[TRUNC]], [[C]] + ; GCN-NEXT: %zext:_(i32) = G_ZEXT %trunc(i16) + ; GCN-NEXT: $vgpr0 = COPY %zext(i32) + %var:_(i32) = COPY $vgpr0 + %c3FFF:_(i32) = G_CONSTANT i32 16383 + %low_bits:_(i32) = G_AND %var, %c3FFF + %trunc:_(i16) = G_TRUNC %low_bits(i32) + %zext:_(i32) = G_ZEXT %trunc(i16) + $vgpr0 = COPY %zext(i32) ... --- @@ -35,16 +35,16 @@ body: | ; GCN-LABEL: name: zext_trunc_s32_s16_s32_unknown_high_bits ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32) - ; GCN-NEXT: %zext:_(s32) = G_ZEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY %zext(s32) - %var:_(s32) = COPY $vgpr0 - %cFFFFF:_(s32) = G_CONSTANT i32 1048575 - %low_bits:_(s32) = G_AND %var, %cFFFFF - %trunc:_(s16) = G_TRUNC %low_bits(s32) - %zext:_(s32) = G_ZEXT %trunc(s16) - $vgpr0 = COPY %zext(s32) + ; GCN-NEXT: %var:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC %var(i32) + ; GCN-NEXT: %zext:_(i32) = G_ZEXT [[TRUNC]](i16) + ; GCN-NEXT: $vgpr0 = COPY %zext(i32) + %var:_(i32) = COPY $vgpr0 + %cFFFFF:_(i32) = G_CONSTANT i32 1048575 + %low_bits:_(i32) = G_AND %var, %cFFFFF + %trunc:_(i16) = G_TRUNC %low_bits(i32) + %zext:_(i32) = G_ZEXT %trunc(i16) + $vgpr0 = COPY %zext(i32) ... --- @@ -57,18 +57,18 @@ body: | ; GCN-LABEL: name: zext_trunc_s64_s16_s32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s64) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383 - ; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]] - ; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16) - ; GCN-NEXT: $vgpr0 = COPY %zext(s32) - %var:_(s64) = COPY $vgpr0_vgpr1 - %c3FFF:_(s64) = G_CONSTANT i64 16383 - %low_bits:_(s64) = G_AND %var, %c3FFF - %trunc:_(s16) = G_TRUNC %low_bits(s64) - %zext:_(s32) = G_ZEXT %trunc(s16) - $vgpr0 = COPY %zext(s32) + ; GCN-NEXT: %var:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC %var(i64) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 16383 + ; GCN-NEXT: %trunc:_(i16) = G_AND [[TRUNC]], [[C]] + ; GCN-NEXT: %zext:_(i32) = G_ZEXT %trunc(i16) + ; GCN-NEXT: $vgpr0 = COPY %zext(i32) + %var:_(i64) = COPY $vgpr0_vgpr1 + %c3FFF:_(i64) = G_CONSTANT i64 16383 + %low_bits:_(i64) = G_AND %var, %c3FFF + %trunc:_(i16) = G_TRUNC %low_bits(i64) + %zext:_(i32) = G_ZEXT %trunc(i16) + $vgpr0 = COPY %zext(i32) ... --- @@ -81,18 +81,18 @@ body: | ; GCN-LABEL: name: zext_trunc_s32_s16_s64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383 - ; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]] - ; GCN-NEXT: %zext:_(s64) = G_ZEXT %trunc(s16) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(s64) - %var:_(s32) = COPY $vgpr0 - %c3FFF:_(s32) = G_CONSTANT i32 16383 - %low_bits:_(s32) = G_AND %var, %c3FFF - %trunc:_(s16) = G_TRUNC %low_bits(s32) - %zext:_(s64) = G_ZEXT %trunc(s16) - $vgpr0_vgpr1 = COPY %zext(s64) + ; GCN-NEXT: %var:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC %var(i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 16383 + ; GCN-NEXT: %trunc:_(i16) = G_AND [[TRUNC]], [[C]] + ; GCN-NEXT: %zext:_(i64) = G_ZEXT %trunc(i16) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(i64) + %var:_(i32) = COPY $vgpr0 + %c3FFF:_(i32) = G_CONSTANT i32 16383 + %low_bits:_(i32) = G_AND %var, %c3FFF + %trunc:_(i16) = G_TRUNC %low_bits(i32) + %zext:_(i64) = G_ZEXT %trunc(i16) + $vgpr0_vgpr1 = COPY %zext(i64) ... --- @@ -105,20 +105,20 @@ body: | ; GCN-LABEL: name: zext_trunc_v2s32_v2s16_v2s32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %c3FFF:_(s32) = G_CONSTANT i32 16383 - ; GCN-NEXT: %c7FFF:_(s32) = G_CONSTANT i32 32767 - ; GCN-NEXT: %c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32) - ; GCN-NEXT: %low_bits:_(<2 x s32>) = G_AND %var, %c - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %low_bits(<2 x s32>) - %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %c3FFF:_(s32) = G_CONSTANT i32 16383 - %c7FFF:_(s32) = G_CONSTANT i32 32767 - %c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32) - %low_bits:_(<2 x s32>) = G_AND %var, %c - %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>) - %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>) - $vgpr0_vgpr1 = COPY %zext(<2 x s32>) + ; GCN-NEXT: %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %c3FFF:_(i32) = G_CONSTANT i32 16383 + ; GCN-NEXT: %c7FFF:_(i32) = G_CONSTANT i32 32767 + ; GCN-NEXT: %c:_(<2 x i32>) = G_BUILD_VECTOR %c3FFF(i32), %c7FFF(i32) + ; GCN-NEXT: %low_bits:_(<2 x i32>) = G_AND %var, %c + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %low_bits(<2 x i32>) + %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %c3FFF:_(i32) = G_CONSTANT i32 16383 + %c7FFF:_(i32) = G_CONSTANT i32 32767 + %c:_(<2 x i32>) = G_BUILD_VECTOR %c3FFF(i32), %c7FFF(i32) + %low_bits:_(<2 x i32>) = G_AND %var, %c + %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i32>) + %zext:_(<2 x i32>) = G_ZEXT %trunc(<2 x i16>) + $vgpr0_vgpr1 = COPY %zext(<2 x i32>) ... --- @@ -131,22 +131,22 @@ body: | ; GCN-LABEL: name: zext_trunc_v2s32_v2s16_v2s32_unknown_high_bits ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %cFFFFF:_(s32) = G_CONSTANT i32 1048575 - ; GCN-NEXT: %c7FFF:_(s32) = G_CONSTANT i32 32767 - ; GCN-NEXT: %c:_(<2 x s32>) = G_BUILD_VECTOR %cFFFFF(s32), %c7FFF(s32) - ; GCN-NEXT: %low_bits:_(<2 x s32>) = G_AND %var, %c - ; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>) - ; GCN-NEXT: %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(<2 x s32>) - %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %cFFFFF:_(s32) = G_CONSTANT i32 1048575 - %c7FFF:_(s32) = G_CONSTANT i32 32767 - %c:_(<2 x s32>) = G_BUILD_VECTOR %cFFFFF(s32), %c7FFF(s32) - %low_bits:_(<2 x s32>) = G_AND %var, %c - %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>) - %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>) - $vgpr0_vgpr1 = COPY %zext(<2 x s32>) + ; GCN-NEXT: %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %cFFFFF:_(i32) = G_CONSTANT i32 1048575 + ; GCN-NEXT: %c7FFF:_(i32) = G_CONSTANT i32 32767 + ; GCN-NEXT: %c:_(<2 x i32>) = G_BUILD_VECTOR %cFFFFF(i32), %c7FFF(i32) + ; GCN-NEXT: %low_bits:_(<2 x i32>) = G_AND %var, %c + ; GCN-NEXT: %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i32>) + ; GCN-NEXT: %zext:_(<2 x i32>) = G_ZEXT %trunc(<2 x i16>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(<2 x i32>) + %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %cFFFFF:_(i32) = G_CONSTANT i32 1048575 + %c7FFF:_(i32) = G_CONSTANT i32 32767 + %c:_(<2 x i32>) = G_BUILD_VECTOR %cFFFFF(i32), %c7FFF(i32) + %low_bits:_(<2 x i32>) = G_AND %var, %c + %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i32>) + %zext:_(<2 x i32>) = G_ZEXT %trunc(<2 x i16>) + $vgpr0_vgpr1 = COPY %zext(<2 x i32>) ... --- @@ -159,22 +159,22 @@ body: | ; GCN-LABEL: name: zext_trunc_v2s64_v2s16_v2s32 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: %c3FFF:_(s64) = G_CONSTANT i64 16383 - ; GCN-NEXT: %c7FFF:_(s64) = G_CONSTANT i64 32767 - ; GCN-NEXT: %c:_(<2 x s64>) = G_BUILD_VECTOR %c3FFF(s64), %c7FFF(s64) - ; GCN-NEXT: %low_bits:_(<2 x s64>) = G_AND %var, %c - ; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s64>) - ; GCN-NEXT: %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(<2 x s32>) - %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %c3FFF:_(s64) = G_CONSTANT i64 16383 - %c7FFF:_(s64) = G_CONSTANT i64 32767 - %c:_(<2 x s64>) = G_BUILD_VECTOR %c3FFF(s64), %c7FFF(s64) - %low_bits:_(<2 x s64>) = G_AND %var, %c - %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s64>) - %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>) - $vgpr0_vgpr1 = COPY %zext(<2 x s32>) + ; GCN-NEXT: %var:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: %c3FFF:_(i64) = G_CONSTANT i64 16383 + ; GCN-NEXT: %c7FFF:_(i64) = G_CONSTANT i64 32767 + ; GCN-NEXT: %c:_(<2 x i64>) = G_BUILD_VECTOR %c3FFF(i64), %c7FFF(i64) + ; GCN-NEXT: %low_bits:_(<2 x i64>) = G_AND %var, %c + ; GCN-NEXT: %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i64>) + ; GCN-NEXT: %zext:_(<2 x i32>) = G_ZEXT %trunc(<2 x i16>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(<2 x i32>) + %var:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %c3FFF:_(i64) = G_CONSTANT i64 16383 + %c7FFF:_(i64) = G_CONSTANT i64 32767 + %c:_(<2 x i64>) = G_BUILD_VECTOR %c3FFF(i64), %c7FFF(i64) + %low_bits:_(<2 x i64>) = G_AND %var, %c + %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i64>) + %zext:_(<2 x i32>) = G_ZEXT %trunc(<2 x i16>) + $vgpr0_vgpr1 = COPY %zext(<2 x i32>) ... --- @@ -187,20 +187,20 @@ body: | ; GCN-LABEL: name: zext_trunc_v2s32_v2s16_v2s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %c3FFF:_(s32) = G_CONSTANT i32 16383 - ; GCN-NEXT: %c7FFF:_(s32) = G_CONSTANT i32 32767 - ; GCN-NEXT: %c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32) - ; GCN-NEXT: %low_bits:_(<2 x s32>) = G_AND %var, %c - ; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>) - ; GCN-NEXT: %zext:_(<2 x s64>) = G_ZEXT %trunc(<2 x s16>) - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %zext(<2 x s64>) - %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %c3FFF:_(s32) = G_CONSTANT i32 16383 - %c7FFF:_(s32) = G_CONSTANT i32 32767 - %c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32) - %low_bits:_(<2 x s32>) = G_AND %var, %c - %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>) - %zext:_(<2 x s64>) = G_ZEXT %trunc(<2 x s16>) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %zext(<2 x s64>) + ; GCN-NEXT: %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %c3FFF:_(i32) = G_CONSTANT i32 16383 + ; GCN-NEXT: %c7FFF:_(i32) = G_CONSTANT i32 32767 + ; GCN-NEXT: %c:_(<2 x i32>) = G_BUILD_VECTOR %c3FFF(i32), %c7FFF(i32) + ; GCN-NEXT: %low_bits:_(<2 x i32>) = G_AND %var, %c + ; GCN-NEXT: %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i32>) + ; GCN-NEXT: %zext:_(<2 x i64>) = G_ZEXT %trunc(<2 x i16>) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %zext(<2 x i64>) + %var:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %c3FFF:_(i32) = G_CONSTANT i32 16383 + %c7FFF:_(i32) = G_CONSTANT i32 32767 + %c:_(<2 x i32>) = G_BUILD_VECTOR %c3FFF(i32), %c7FFF(i32) + %low_bits:_(<2 x i32>) = G_AND %var, %c + %trunc:_(<2 x i16>) = G_TRUNC %low_bits(<2 x i32>) + %zext:_(<2 x i64>) = G_ZEXT %trunc(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %zext(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir index 0c0f96a808a96..b15de42d58d13 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir @@ -12,19 +12,19 @@ body: | ; CHECK-LABEL: name: known_sign_bits_smed3_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %val0:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val1:_(s32) = G_CONSTANT i32 -255 - ; CHECK-NEXT: %val2:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - ; CHECK-NEXT: $vgpr0 = COPY %smed3(s32) - %val:_(s32) = COPY $vgpr0 - %val0:_(s32) = G_SEXT_INREG %val, 8 - %val1:_(s32) = G_CONSTANT i32 -255 - %val2:_(s32) = G_CONSTANT i32 255 - %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - %inreg:_(s32) = G_SEXT_INREG %smed3, 9 - $vgpr0 = COPY %inreg + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %val0:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val1:_(i32) = G_CONSTANT i32 -255 + ; CHECK-NEXT: %val2:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + ; CHECK-NEXT: $vgpr0 = COPY %smed3(i32) + %val:_(i32) = COPY $vgpr0 + %val0:_(i32) = G_SEXT_INREG %val, 8 + %val1:_(i32) = G_CONSTANT i32 -255 + %val2:_(i32) = G_CONSTANT i32 255 + %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + %inreg:_(i32) = G_SEXT_INREG %smed3, 9 + $vgpr0 = COPY %inreg(i32) ... @@ -39,19 +39,19 @@ body: | ; CHECK-LABEL: name: known_sign_bits_smed3_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %val0:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val1:_(s32) = G_CONSTANT i32 -255 - ; CHECK-NEXT: %val2:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val1, %val0, %val2 - ; CHECK-NEXT: $vgpr0 = COPY %smed3(s32) - %val:_(s32) = COPY $vgpr0 - %val0:_(s32) = G_SEXT_INREG %val, 8 - %val1:_(s32) = G_CONSTANT i32 -255 - %val2:_(s32) = G_CONSTANT i32 255 - %smed3:_(s32) = G_AMDGPU_SMED3 %val1, %val0, %val2 - %inreg:_(s32) = G_SEXT_INREG %smed3, 9 - $vgpr0 = COPY %inreg + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %val0:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val1:_(i32) = G_CONSTANT i32 -255 + ; CHECK-NEXT: %val2:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %smed3:_(i32) = G_AMDGPU_SMED3 %val1, %val0, %val2 + ; CHECK-NEXT: $vgpr0 = COPY %smed3(i32) + %val:_(i32) = COPY $vgpr0 + %val0:_(i32) = G_SEXT_INREG %val, 8 + %val1:_(i32) = G_CONSTANT i32 -255 + %val2:_(i32) = G_CONSTANT i32 255 + %smed3:_(i32) = G_AMDGPU_SMED3 %val1, %val0, %val2 + %inreg:_(i32) = G_SEXT_INREG %smed3, 9 + $vgpr0 = COPY %inreg(i32) ... @@ -66,19 +66,19 @@ body: | ; CHECK-LABEL: name: known_sign_bits_smed3_2 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %val0:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val1:_(s32) = G_CONSTANT i32 -256 - ; CHECK-NEXT: %val2:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val1, %val2, %val0 - ; CHECK-NEXT: $vgpr0 = COPY %smed3(s32) - %val:_(s32) = COPY $vgpr0 - %val0:_(s32) = G_SEXT_INREG %val, 8 - %val1:_(s32) = G_CONSTANT i32 -256 - %val2:_(s32) = G_CONSTANT i32 128 - %smed3:_(s32) = G_AMDGPU_SMED3 %val1, %val2, %val0 - %inreg:_(s32) = G_SEXT_INREG %smed3, 9 - $vgpr0 = COPY %inreg + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %val0:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val1:_(i32) = G_CONSTANT i32 -256 + ; CHECK-NEXT: %val2:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: %smed3:_(i32) = G_AMDGPU_SMED3 %val1, %val2, %val0 + ; CHECK-NEXT: $vgpr0 = COPY %smed3(i32) + %val:_(i32) = COPY $vgpr0 + %val0:_(i32) = G_SEXT_INREG %val, 8 + %val1:_(i32) = G_CONSTANT i32 -256 + %val2:_(i32) = G_CONSTANT i32 128 + %smed3:_(i32) = G_AMDGPU_SMED3 %val1, %val2, %val0 + %inreg:_(i32) = G_SEXT_INREG %smed3, 9 + $vgpr0 = COPY %inreg(i32) ... @@ -93,20 +93,20 @@ body: | ; CHECK-LABEL: name: not_enough_sign_bits_smed3_0 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %val0:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val1:_(s32) = G_SEXT_INREG %val, 9 - ; CHECK-NEXT: %val2:_(s32) = G_SEXT_INREG %val, 9 - ; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %smed3, 8 - ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) - %val:_(s32) = COPY $vgpr0 - %val0:_(s32) = G_SEXT_INREG %val, 8 - %val1:_(s32) = G_SEXT_INREG %val, 9 - %val2:_(s32) = G_SEXT_INREG %val, 9 - %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - %inreg:_(s32) = G_SEXT_INREG %smed3, 8 - $vgpr0 = COPY %inreg + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %val0:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val1:_(i32) = G_SEXT_INREG %val, 9 + ; CHECK-NEXT: %val2:_(i32) = G_SEXT_INREG %val, 9 + ; CHECK-NEXT: %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + ; CHECK-NEXT: %inreg:_(i32) = G_SEXT_INREG %smed3, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(i32) + %val:_(i32) = COPY $vgpr0 + %val0:_(i32) = G_SEXT_INREG %val, 8 + %val1:_(i32) = G_SEXT_INREG %val, 9 + %val2:_(i32) = G_SEXT_INREG %val, 9 + %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + %inreg:_(i32) = G_SEXT_INREG %smed3, 8 + $vgpr0 = COPY %inreg(i32) ... @@ -121,20 +121,20 @@ body: | ; CHECK-LABEL: name: not_enough_sign_bits_smed3_1 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %val0:_(s32) = G_SEXT_INREG %val, 9 - ; CHECK-NEXT: %val1:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val2:_(s32) = G_SEXT_INREG %val, 9 - ; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %smed3, 8 - ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) - %val:_(s32) = COPY $vgpr0 - %val0:_(s32) = G_SEXT_INREG %val, 9 - %val1:_(s32) = G_SEXT_INREG %val, 8 - %val2:_(s32) = G_SEXT_INREG %val, 9 - %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - %inreg:_(s32) = G_SEXT_INREG %smed3, 8 - $vgpr0 = COPY %inreg + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %val0:_(i32) = G_SEXT_INREG %val, 9 + ; CHECK-NEXT: %val1:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val2:_(i32) = G_SEXT_INREG %val, 9 + ; CHECK-NEXT: %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + ; CHECK-NEXT: %inreg:_(i32) = G_SEXT_INREG %smed3, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(i32) + %val:_(i32) = COPY $vgpr0 + %val0:_(i32) = G_SEXT_INREG %val, 9 + %val1:_(i32) = G_SEXT_INREG %val, 8 + %val2:_(i32) = G_SEXT_INREG %val, 9 + %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + %inreg:_(i32) = G_SEXT_INREG %smed3, 8 + $vgpr0 = COPY %inreg(i32) ... @@ -149,19 +149,19 @@ body: | ; CHECK-LABEL: name: not_enough_sign_bits_smed3_2 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %val0:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val1:_(s32) = G_SEXT_INREG %val, 8 - ; CHECK-NEXT: %val2:_(s32) = G_SEXT_INREG %val, 9 - ; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %smed3, 8 - ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) - %val:_(s32) = COPY $vgpr0 - %val0:_(s32) = G_SEXT_INREG %val, 8 - %val1:_(s32) = G_SEXT_INREG %val, 8 - %val2:_(s32) = G_SEXT_INREG %val, 9 - %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2 - %inreg:_(s32) = G_SEXT_INREG %smed3, 8 - $vgpr0 = COPY %inreg + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %val0:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val1:_(i32) = G_SEXT_INREG %val, 8 + ; CHECK-NEXT: %val2:_(i32) = G_SEXT_INREG %val, 9 + ; CHECK-NEXT: %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + ; CHECK-NEXT: %inreg:_(i32) = G_SEXT_INREG %smed3, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(i32) + %val:_(i32) = COPY $vgpr0 + %val0:_(i32) = G_SEXT_INREG %val, 8 + %val1:_(i32) = G_SEXT_INREG %val, 8 + %val2:_(i32) = G_SEXT_INREG %val, 9 + %smed3:_(i32) = G_AMDGPU_SMED3 %val0, %val1, %val2 + %inreg:_(i32) = G_SEXT_INREG %smed3, 8 + $vgpr0 = COPY %inreg(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir index 6594d7f504212..b5acd8d2ec5e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir @@ -21,94 +21,94 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) ; GFX10-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY3]](i32), [[C1]] + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP]](i1) + ; GFX10-NEXT: G_BRCOND [[ICMP1]](i1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 123 - ; GFX10-NEXT: G_STORE [[C2]](s32), [[MV1]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 123 + ; GFX10-NEXT: G_STORE [[C2]](i32), [[MV1]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, %20(s1), %bb.3 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[COPY6]](i1), %bb.0, %20(i1), %bb.3 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C3]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY2]](i32), [[C3]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY7]](i1), [[C5]], [[C4]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $sgpr0 - %5:_(s32) = COPY $vgpr3 - %6:_(s32) = COPY $vgpr4 - %7:_(p1) = G_MERGE_VALUES %5(s32), %6(s32) - %8:_(s32) = G_CONSTANT i32 6 - %9:_(s1) = G_ICMP intpred(uge), %3(s32), %8 - %10:_(s32) = G_CONSTANT i32 0 - %11:_(s1) = G_ICMP intpred(ne), %4(s32), %10 - G_BRCOND %11(s1), %bb.2 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $sgpr0 + %5:_(i32) = COPY $vgpr3 + %6:_(i32) = COPY $vgpr4 + %7:_(p1) = G_MERGE_VALUES %5(i32), %6(i32) + %8:_(i32) = G_CONSTANT i32 6 + %9:_(i1) = G_ICMP intpred(uge), %3(i32), %8 + %10:_(i32) = G_CONSTANT i32 0 + %11:_(i1) = G_ICMP intpred(ne), %4(i32), %10 + G_BRCOND %11(i1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.3(0x80000000) - %12:_(s32) = G_CONSTANT i32 123 - G_STORE %12(s32), %7(p1) :: (store (s32), addrspace 1) + %12:_(i32) = G_CONSTANT i32 123 + G_STORE %12(i32), %7(p1) :: (store (i32), addrspace 1) G_BR %bb.3 bb.2: successors: %bb.4(0x80000000) - %13:_(s1) = G_PHI %14(s1), %bb.3, %9(s1), %bb.0 + %13:_(i1) = G_PHI %14(i1), %bb.3, %9(i1), %bb.0 G_BR %bb.4 bb.3: successors: %bb.2(0x80000000) - %15:_(s32) = G_CONSTANT i32 1 - %14:_(s1) = G_ICMP intpred(ult), %3(s32), %15 + %15:_(i32) = G_CONSTANT i32 1 + %14:_(i1) = G_ICMP intpred(ult), %3(i32), %15 G_BR %bb.2 bb.4: - %16:_(s32) = G_CONSTANT i32 2 - %17:_(s32) = G_CONSTANT i32 1 - %18:_(s32) = G_SELECT %13(s1), %17, %16 - G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1) + %16:_(i32) = G_CONSTANT i32 2 + %17:_(i32) = G_CONSTANT i32 1 + %18:_(i32) = G_SELECT %13(i1), %17, %16 + G_STORE %18(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -122,66 +122,66 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) ; GFX10-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1) - ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY3]](i32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP]](i1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[COPY4]](i1) + ; GFX10-NEXT: G_BRCOND [[ICMP1]](i1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY2]](i32), [[C2]] + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY5]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY6]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[COPY4]](i1), %bb.0, [[S_OR_B32_]](i1), %bb.1 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY7]](i1), [[C4]], [[C3]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $sgpr0 - %5:_(s32) = G_CONSTANT i32 6 - %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7 - G_BRCOND %8(s1), %bb.2 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $sgpr0 + %5:_(i32) = G_CONSTANT i32 6 + %6:_(i1) = G_ICMP intpred(uge), %3(i32), %5 + %7:_(i32) = G_CONSTANT i32 0 + %8:_(i1) = G_ICMP intpred(ne), %4(i32), %7 + G_BRCOND %8(i1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %9:_(s32) = G_CONSTANT i32 1 - %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9 + %9:_(i32) = G_CONSTANT i32 1 + %10:_(i1) = G_ICMP intpred(ult), %3(i32), %9 bb.2: - %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 - %12:_(s32) = G_CONSTANT i32 2 - %13:_(s32) = G_CONSTANT i32 1 - %14:_(s32) = G_SELECT %11(s1), %13, %12 - G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + %11:_(i1) = G_PHI %6(i1), %bb.0, %10(i1), %bb.1 + %12:_(i32) = G_CONSTANT i32 2 + %13:_(i32) = G_CONSTANT i32 1 + %14:_(i32) = G_SELECT %11(i1), %13, %12 + G_STORE %14(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -195,80 +195,86 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF]](i1), %bb.0, %25(i1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %7(i32), %bb.1, [[C1]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C1]](i32), %bb.0, %9(i32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i1) = G_PHI [[C]](i1), %bb.0, %11(i1), %bb.1 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[PHI3]], [[C2]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[XOR]](i1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[PHI2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI1]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY3]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY4]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[C5]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[C4]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY5]](i1), [[BITCAST1]], [[BITCAST2]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s1) = G_CONSTANT i1 true - %5:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p0) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i1) = G_CONSTANT i1 true + %5:_(i32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1 - %12:_(s1) = G_CONSTANT i1 true - %11:_(s1) = G_XOR %10, %12 - %13:_(s32) = G_UITOFP %8(s32) - %14:_(s1) = G_FCMP floatpred(ogt), %13(s32), %0 - %15:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = G_ADD %8, %15 - %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %6(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:_(i32) = G_PHI %7(i32), %bb.1, %5(i32), %bb.0 + %8:_(i32) = G_PHI %5(i32), %bb.0, %9(i32), %bb.1 + %10:_(i1) = G_PHI %4(i1), %bb.0, %11(i1), %bb.1 + %12:_(i1) = G_CONSTANT i1 true + %11:_(i1) = G_XOR %10, %12 + %13:_(f32) = G_UITOFP %8(i32) + %14:_(f32) = G_BITCAST %0(i32) + %15:_(i1) = G_FCMP floatpred(ogt), %13(f32), %14 + %16:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = G_ADD %8, %16 + %7:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %15(i1), %6(i32) + SI_LOOP %7(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %16:_(s1) = G_PHI %11(s1), %bb.1 - %17:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) - %18:_(s32) = G_FCONSTANT float 0.000000e+00 - %19:_(s32) = G_FCONSTANT float 1.000000e+00 - %20:_(s32) = G_SELECT %16(s1), %19, %18 - G_STORE %20(s32), %3(p0) :: (store (s32)) + %17:_(i1) = G_PHI %11(i1), %bb.1 + %18:_(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(i32) + %19:_(f32) = G_FCONSTANT float 0.000000e+00 + %20:_(f32) = G_FCONSTANT float 1.000000e+00 + %21:_(i32) = G_BITCAST %20(f32) + %22:_(i32) = G_BITCAST %19(f32) + %23:_(i32) = G_SELECT %17(i1), %21, %22 + G_STORE %23(i32), %3(p0) :: (store (i32)) SI_RETURN ... @@ -282,165 +288,173 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C1]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[FCMP]](i1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %39(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C3]] - ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF]](i1), %bb.0, %46(i1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[COPY8]](i1), %bb.0, %43(i1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI %16(i32), %bb.5, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %18(i32), %bb.5 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1000 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(sle), [[PHI3]](i32), [[C3]] + ; GFX10-NEXT: G_BRCOND [[ICMP]](i1), %bb.4 ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]] - ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i1) = G_PHI %25(i1), %bb.4, [[C2]](i1), %bb.1 + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[PHI4]], [[C4]] + ; GFX10-NEXT: G_BRCOND [[XOR]](i1), %bb.5 ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: G_STORE [[C5]](s32), [[MV1]](p0) :: (store (s32)) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1000 + ; GFX10-NEXT: G_STORE [[C5]](i32), [[MV1]](p0) :: (store (i32)) ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: G_STORE [[C7]](s32), [[MV2]](p0) :: (store (s32)) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 1000 + ; GFX10-NEXT: G_STORE [[C7]](i32), [[MV2]](p0) :: (store (i32)) ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY10]], [[C8]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) - ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[COPY10]], [[C8]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[XOR1]](i1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[PHI3]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST1]] + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI3]], [[C9]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](i1), [[PHI2]](i32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[XOR1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY9]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY11]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.5 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](i32) + ; GFX10-NEXT: [[C10:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C11:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[C11]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[C10]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY13]](i1), [[BITCAST2]], [[BITCAST3]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = COPY $vgpr4 - %6:_(s32) = COPY $vgpr5 - %7:_(p0) = G_MERGE_VALUES %5(s32), %6(s32) - %8:_(s32) = COPY $vgpr6 - %9:_(s32) = COPY $vgpr7 - %10:_(p0) = G_MERGE_VALUES %8(s32), %9(s32) - %11:_(s32) = G_CONSTANT i32 0 - %12:_(s32) = G_FCONSTANT float 1.000000e+00 - %13:_(s1) = G_FCMP floatpred(ogt), %1(s32), %12 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p0) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = COPY $vgpr4 + %6:_(i32) = COPY $vgpr5 + %7:_(p0) = G_MERGE_VALUES %5(i32), %6(i32) + %8:_(i32) = COPY $vgpr6 + %9:_(i32) = COPY $vgpr7 + %10:_(p0) = G_MERGE_VALUES %8(i32), %9(i32) + %11:_(i32) = G_CONSTANT i32 0 + %12:_(f32) = G_FCONSTANT float 1.000000e+00 + %13:_(f32) = G_BITCAST %1(i32) + %14:_(i1) = G_FCMP floatpred(ogt), %13(f32), %12 bb.1: successors: %bb.4(0x40000000), %bb.2(0x40000000) + %15:_(i32) = G_PHI %16(i32), %bb.5, %11(i32), %bb.0 + %17:_(i32) = G_PHI %11(i32), %bb.0, %18(i32), %bb.5 + %19:_(i1) = G_PHI %14(i1), %bb.0, %20(i1), %bb.5 + %21:_(i1) = G_CONSTANT i1 true + %22:_(i32) = G_CONSTANT i32 1000 + %23:_(i1) = G_ICMP intpred(sle), %17(i32), %22 + G_BRCOND %23(i1), %bb.4 - %14:_(s32) = G_PHI %15(s32), %bb.5, %11(s32), %bb.0 - %16:_(s32) = G_PHI %11(s32), %bb.0, %17(s32), %bb.5 - %18:_(s1) = G_PHI %13(s1), %bb.0, %19(s1), %bb.5 - %20:_(s1) = G_CONSTANT i1 true - %21:_(s32) = G_CONSTANT i32 1000 - %22:_(s1) = G_ICMP intpred(sle), %16(s32), %21 - G_BRCOND %22(s1), %bb.4 G_BR %bb.2 bb.2: successors: %bb.3(0x40000000), %bb.5(0x40000000) + %24:_(i1) = G_PHI %25(i1), %bb.4, %21(i1), %bb.1 + %26:_(i1) = G_CONSTANT i1 true + %27:_(i1) = G_XOR %24, %26 + G_BRCOND %27(i1), %bb.5 - %23:_(s1) = G_PHI %24(s1), %bb.4, %20(s1), %bb.1 - %25:_(s1) = G_CONSTANT i1 true - %26:_(s1) = G_XOR %23, %25 - G_BRCOND %26(s1), %bb.5 G_BR %bb.3 bb.3: successors: %bb.5(0x80000000) - %27:_(s32) = G_CONSTANT i32 1000 - G_STORE %27(s32), %7(p0) :: (store (s32)) + %28:_(i32) = G_CONSTANT i32 1000 + G_STORE %28(i32), %7(p0) :: (store (i32)) G_BR %bb.5 bb.4: successors: %bb.2(0x80000000) - %24:_(s1) = G_CONSTANT i1 false - %28:_(s32) = G_CONSTANT i32 1000 - G_STORE %28(s32), %10(p0) :: (store (s32)) + %25:_(i1) = G_CONSTANT i1 false + %29:_(i32) = G_CONSTANT i32 1000 + G_STORE %29(i32), %10(p0) :: (store (i32)) G_BR %bb.2 bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %29:_(s1) = G_CONSTANT i1 true - %19:_(s1) = G_XOR %18, %29 - %30:_(s32) = G_UITOFP %16(s32) - %31:_(s1) = G_FCMP floatpred(ogt), %30(s32), %0 - %32:_(s32) = G_CONSTANT i32 1 - %17:_(s32) = G_ADD %16, %32 - %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %31(s1), %14(s32) - SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %30:_(i1) = G_CONSTANT i1 true + %20:_(i1) = G_XOR %19, %30 + %31:_(f32) = G_UITOFP %17(i32) + %32:_(f32) = G_BITCAST %0(i32) + %33:_(i1) = G_FCMP floatpred(ogt), %31(f32), %32 + %34:_(i32) = G_CONSTANT i32 1 + %18:_(i32) = G_ADD %17, %34 + %16:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %33(i1), %15(i32) + SI_LOOP %16(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.6: - %33:_(s1) = G_PHI %19(s1), %bb.5 - %34:_(s32) = G_PHI %15(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32) - %35:_(s32) = G_FCONSTANT float 0.000000e+00 - %36:_(s32) = G_FCONSTANT float 1.000000e+00 - %37:_(s32) = G_SELECT %33(s1), %36, %35 - G_STORE %37(s32), %4(p0) :: (store (s32)) + %35:_(i1) = G_PHI %20(i1), %bb.5 + %36:_(i32) = G_PHI %16(i32), %bb.5 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %36(i32) + %37:_(f32) = G_FCONSTANT float 0.000000e+00 + %38:_(f32) = G_FCONSTANT float 1.000000e+00 + %39:_(i32) = G_BITCAST %38(f32) + %40:_(i32) = G_BITCAST %37(f32) + %41:_(i32) = G_SELECT %35(i1), %39, %40 + G_STORE %41(i32), %4(p0) :: (store (i32)) SI_RETURN ... @@ -454,187 +468,187 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[ZEXT]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (load (<8 x s32>)) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[BITCAST]](s256) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](s128) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C2]](s32), [[C1]](s32) - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C2]](s32), [[INT1]](s32) - ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FREEZE]], [[C3]](s32) - ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (load (s32), align 1, addrspace 8) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_BUFFER_LOAD]](s32), [[C1]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C4]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC1]], [[C5]] - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) - ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -4294967296 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[INT]], [[C]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[AND]], [[ZEXT]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[INTTOPTR]](p4) :: (load (<8 x i32>)) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i128) = G_TRUNC [[BITCAST]](i256) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[TRUNC]](i128) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C2]](i32), [[C1]](i32) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C2]](i32), [[INT1]](i32) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(i32) = G_FREEZE [[INT2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[FREEZE]], [[C3]](i32) + ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x i32>), [[C1]](i32), [[SHL]], [[C1]], 0, 0, 0 :: (load (i32), align 1, addrspace 8) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AMDGPU_BUFFER_LOAD]](i32), [[C1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[FREEZE]], [[C4]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[AND1]](i32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[TRUNC1]], [[C5]] + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(i1) = COPY [[C5]](i1) + ; GFX10-NEXT: G_BRCOND [[XOR]](i1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %58(s1), %bb.4 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: G_BRCOND [[COPY4]](s1), %bb.5 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[COPY3]](i1), %bb.0, %58(i1), %bb.4 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %30(i32), %bb.4, [[DEF]](i32), %bb.0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: G_BRCOND [[COPY4]](i1), %bb.5 ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %34(s32), %bb.3, [[C6]](s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %36(s32), %bb.3, [[FREEZE]](s32), %bb.1 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %38(s32), %bb.3, [[C6]](s32), %bb.1 - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (load (s32), align 1, addrspace 8) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AMDGPU_BUFFER_LOAD1]], [[PHI4]] - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C8]] - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[C7]] - ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI %34(i32), %bb.3, [[C6]](i32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI %36(i32), %bb.3, [[FREEZE]](i32), %bb.1 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI %38(i32), %bb.3, [[C6]](i32), %bb.1 + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(i32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x i32>), [[C7]](i32), [[PHI2]], [[C7]], 0, 0, 0 :: (load (i32), align 1, addrspace 8) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AMDGPU_BUFFER_LOAD1]], [[PHI4]] + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI3]], [[C8]] + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C9]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD1]](i32), [[C7]] + ; GFX10-NEXT: G_BRCOND [[ICMP1]](i1), %bb.3 ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.3 - ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PHI5]](s32), [[AMDGPU_BUFFER_LOAD]] - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s1) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C10]](s1) + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI [[ADD]](i32), %bb.3 + ; GFX10-NEXT: [[C10:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[PHI5]](i32), [[AMDGPU_BUFFER_LOAD]] + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP2]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[C10]](i1) ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[C11]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX10-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[C11]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY1]] - ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD3]], [[C12]](s32) - ; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI6]](s32), [[UV1]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (store (s32), align 1, addrspace 8) + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(i32) = G_PHI [[PHI1]](i32), %bb.2, [[OR2]](i32), %bb.5 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[LOAD]](<8 x i32>) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[COPY2]], [[COPY1]] + ; GFX10-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ADD3]], [[C12]](i32) + ; GFX10-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI6]](i32), [[UV1]](<4 x i32>), [[C13]](i32), [[SHL1]], [[C13]], 0, 0, 0 :: (store (i32), align 1, addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_IMPLICIT_DEF - %4:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) - %5:_(s64) = G_CONSTANT i64 -4294967296 - %6:_(s64) = G_AND %4, %5 - %7:_(s64) = G_ZEXT %0(s32) - %8:_(s64) = G_OR %6, %7 - %9:_(p4) = G_INTTOPTR %8(s64) - %10:_(<8 x s32>) = G_LOAD %9(p4) :: (load (<8 x s32>)) - %11:_(s256) = G_BITCAST %10(<8 x s32>) - %12:_(s128) = G_TRUNC %11(s256) - %13:_(<4 x s32>) = G_BITCAST %12(s128) - %15:_(s32) = G_CONSTANT i32 0 - %14:_(s32) = G_CONSTANT i32 -1 - %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %14(s32), %15(s32) - %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %14(s32), %16(s32) - %18:_(s32) = G_FREEZE %17 - %19:_(s32) = G_CONSTANT i32 2 - %20:_(s32) = G_SHL %18, %19(s32) - %21:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %15(s32), %20, %15, 0, 0, 0 :: (load (s32), align 1, addrspace 8) - %22:_(s1) = G_ICMP intpred(eq), %21(s32), %15 - %23:_(s32) = G_CONSTANT i32 1 - %24:_(s32) = G_AND %18, %23 - %25:_(s1) = G_TRUNC %24(s32) - %26:_(s1) = G_CONSTANT i1 true - %27:_(s1) = G_XOR %25, %26 - G_BRCOND %27(s1), %bb.2 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_IMPLICIT_DEF + %4:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) + %5:_(i64) = G_CONSTANT i64 -4294967296 + %6:_(i64) = G_AND %4, %5 + %7:_(i64) = G_ZEXT %0(i32) + %8:_(i64) = G_OR %6, %7 + %9:_(p4) = G_INTTOPTR %8(i64) + %10:_(<8 x i32>) = G_LOAD %9(p4) :: (load (<8 x i32>)) + %11:_(i256) = G_BITCAST %10(<8 x i32>) + %12:_(i128) = G_TRUNC %11(i256) + %13:_(<4 x i32>) = G_BITCAST %12(i128) + %14:_(i32) = G_CONSTANT i32 0 + %15:_(i32) = G_CONSTANT i32 -1 + %16:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %15(i32), %14(i32) + %17:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %15(i32), %16(i32) + %18:_(i32) = G_FREEZE %17 + %19:_(i32) = G_CONSTANT i32 2 + %20:_(i32) = G_SHL %18, %19(i32) + %21:_(i32) = G_AMDGPU_BUFFER_LOAD %13(<4 x i32>), %14(i32), %20, %14, 0, 0, 0 :: (load (i32), align 1, addrspace 8) + %22:_(i1) = G_ICMP intpred(eq), %21(i32), %14 + %23:_(i32) = G_CONSTANT i32 1 + %24:_(i32) = G_AND %18, %23 + %25:_(i1) = G_TRUNC %24(i32) + %26:_(i1) = G_CONSTANT i1 true + %27:_(i1) = G_XOR %25, %26 + G_BRCOND %27(i1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.3(0x80000000) - %28:_(s32) = G_CONSTANT i32 0 + %28:_(i32) = G_CONSTANT i32 0 G_BR %bb.3 bb.2: successors: %bb.5(0x40000000), %bb.6(0x40000000) - %29:_(s32) = G_PHI %30(s32), %bb.4, %3(s32), %bb.0 - %31:_(s1) = G_PHI %32(s1), %bb.4, %26(s1), %bb.0 - G_BRCOND %31(s1), %bb.5 + %29:_(i32) = G_PHI %30(i32), %bb.4, %3(i32), %bb.0 + %31:_(i1) = G_PHI %32(i1), %bb.4, %26(i1), %bb.0 + G_BRCOND %31(i1), %bb.5 G_BR %bb.6 bb.3: successors: %bb.4(0x04000000), %bb.3(0x7c000000) - %33:_(s32) = G_PHI %34(s32), %bb.3, %28(s32), %bb.1 - %35:_(s32) = G_PHI %36(s32), %bb.3, %18(s32), %bb.1 - %37:_(s32) = G_PHI %38(s32), %bb.3, %28(s32), %bb.1 - %39:_(s32) = G_CONSTANT i32 0 - %40:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %39(s32), %33, %39, 0, 0, 0 :: (load (s32), align 1, addrspace 8) - %38:_(s32) = G_ADD %40, %37 - %41:_(s32) = G_CONSTANT i32 -1 - %36:_(s32) = G_ADD %35, %41 - %42:_(s32) = G_CONSTANT i32 4 - %34:_(s32) = G_ADD %33, %42 - %43:_(s1) = G_ICMP intpred(ne), %36(s32), %39 - G_BRCOND %43(s1), %bb.3 + %33:_(i32) = G_PHI %34(i32), %bb.3, %28(i32), %bb.1 + %35:_(i32) = G_PHI %36(i32), %bb.3, %18(i32), %bb.1 + %37:_(i32) = G_PHI %38(i32), %bb.3, %28(i32), %bb.1 + %39:_(i32) = G_CONSTANT i32 0 + %40:_(i32) = G_AMDGPU_BUFFER_LOAD %13(<4 x i32>), %39(i32), %33, %39, 0, 0, 0 :: (load (i32), align 1, addrspace 8) + %38:_(i32) = G_ADD %40, %37 + %41:_(i32) = G_CONSTANT i32 -1 + %36:_(i32) = G_ADD %35, %41 + %42:_(i32) = G_CONSTANT i32 4 + %34:_(i32) = G_ADD %33, %42 + %43:_(i1) = G_ICMP intpred(ne), %36(i32), %39 + G_BRCOND %43(i1), %bb.3 G_BR %bb.4 bb.4: successors: %bb.2(0x80000000) - %44:_(s32) = G_PHI %38(s32), %bb.3 - %32:_(s1) = G_CONSTANT i1 false - %45:_(s1) = G_ICMP intpred(eq), %44(s32), %21 - %46:_(s1) = G_OR %22, %45 - %30:_(s32) = G_ZEXT %46(s1) + %44:_(i32) = G_PHI %38(i32), %bb.3 + %32:_(i1) = G_CONSTANT i1 false + %45:_(i1) = G_ICMP intpred(eq), %44(i32), %21 + %46:_(i1) = G_OR %22, %45 + %30:_(i32) = G_ZEXT %46(i1) G_BR %bb.2 bb.5: successors: %bb.6(0x80000000) - %47:_(s32) = G_ZEXT %22(s1) - %48:_(s32) = G_CONSTANT i32 2 - %49:_(s32) = G_OR %47, %48 + %47:_(i32) = G_ZEXT %22(i1) + %48:_(i32) = G_CONSTANT i32 2 + %49:_(i32) = G_OR %47, %48 bb.6: - %50:_(s32) = G_PHI %29(s32), %bb.2, %49(s32), %bb.5 - %51:_(<4 x s32>), %52:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>) - %53:_(s32) = G_ADD %2, %1 - %54:_(s32) = G_CONSTANT i32 2 - %55:_(s32) = G_SHL %53, %54(s32) - %56:_(s32) = G_CONSTANT i32 0 - G_AMDGPU_BUFFER_STORE %50(s32), %52(<4 x s32>), %56(s32), %55, %56, 0, 0, 0 :: (store (s32), align 1, addrspace 8) + %50:_(i32) = G_PHI %29(i32), %bb.2, %49(i32), %bb.5 + %51:_(<4 x i32>), %52:_(<4 x i32>) = G_UNMERGE_VALUES %10(<8 x i32>) + %53:_(i32) = G_ADD %2, %1 + %54:_(i32) = G_CONSTANT i32 2 + %55:_(i32) = G_SHL %53, %54(i32) + %56:_(i32) = G_CONSTANT i32 0 + G_AMDGPU_BUFFER_STORE %50(i32), %52(<4 x i32>), %56(i32), %55, %56, 0, 0, 0 :: (store (i32), align 1, addrspace 8) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir index 5bbe3e4886899..5606c2aacb649 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir @@ -11,97 +11,107 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[FCMP]](i1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[DEF]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY5]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY4]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %36(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) - ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %40(i1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.0, %28(i1), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI %10(i32), %bb.1, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %12(i32), %bb.1 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[COPY7]](i1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[COPY7]], [[C2]] + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[XOR]](i1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[PHI3]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI3]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](i1), [[PHI2]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY9]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY10]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY8]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_2]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.1 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_2]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[C5]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[C4]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY11]](i1), [[BITCAST2]], [[BITCAST3]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 0 - %6:_(s32) = G_FCONSTANT float 1.000000e+00 - %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6 - + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p0) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = G_CONSTANT i32 0 + %6:_(f32) = G_FCONSTANT float 1.000000e+00 + %7:_(f32) = G_BITCAST %1(i32) + %8:_(i1) = G_FCMP floatpred(ogt), %7(f32), %6 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - - %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0 - %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1 - %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1 - %14:_(s1) = G_CONSTANT i1 true - %13:_(s1) = G_XOR %12, %14 - %15:_(s32) = G_UITOFP %10(s32) - %16:_(s1) = G_FCMP floatpred(ogt), %15(s32), %0 - %17:_(s32) = G_CONSTANT i32 1 - %11:_(s32) = G_ADD %10, %17 - %9:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %16(s1), %8(s32) - SI_LOOP %9(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %9:_(i32) = G_PHI %10(i32), %bb.1, %5(i32), %bb.0 + %11:_(i32) = G_PHI %5(i32), %bb.0, %12(i32), %bb.1 + %13:_(i1) = G_PHI %8(i1), %bb.0, %14(i1), %bb.1 + %15:_(i1) = G_CONSTANT i1 true + %14:_(i1) = G_XOR %13, %15 + %16:_(f32) = G_UITOFP %11(i32) + %17:_(f32) = G_BITCAST %0(i32) + %18:_(i1) = G_FCMP floatpred(ogt), %16(f32), %17 + %19:_(i32) = G_CONSTANT i32 1 + %12:_(i32) = G_ADD %11, %19 + %10:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(i1), %9(i32) + SI_LOOP %10(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %18:_(s1) = G_PHI %12(s1), %bb.1 - %19:_(s32) = G_PHI %9(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32) - %20:_(s32) = G_FCONSTANT float 0.000000e+00 - %21:_(s32) = G_FCONSTANT float 1.000000e+00 - %22:_(s32) = G_SELECT %18(s1), %21, %20 - G_STORE %22(s32), %4(p0) :: (store (s32)) + %20:_(i1) = G_PHI %13(i1), %bb.1 + %21:_(i32) = G_PHI %10(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %21(i32) + %22:_(f32) = G_FCONSTANT float 0.000000e+00 + %23:_(f32) = G_FCONSTANT float 1.000000e+00 + %24:_(i32) = G_BITCAST %23(f32) + %25:_(i32) = G_BITCAST %22(f32) + %26:_(i32) = G_SELECT %20(i1), %24, %25 + G_STORE %26(i32), %4(p0) :: (store (i32)) SI_RETURN + + + + ... --- @@ -114,128 +124,140 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[DEF]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY5]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY4]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %27(s1), %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %43(i1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[S_OR_B32_]](i1), %bb.0, %29(i1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %9(i32), %bb.3 ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[COPY7]](i1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY8]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_1]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY7]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PHI3]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C2]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PHI3]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[LOAD]](i32), [[C2]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY11]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.2 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY12]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[PHI3]], [[C3]](s64) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[PHI2]], [[C4]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[ADD]](s32), [[C5]] - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_1]](i1), %bb.1, [[S_OR_B32_2]](i1), %bb.2 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[PHI4]](i1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[COPY12]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[PHI3]], [[C3]](i64) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = nsw G_ADD [[PHI2]], [[C4]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sge), [[ADD]](i32), [[C5]] + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY9]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY13]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[COPY7]](i1) + ; GFX10-NEXT: G_BRCOND [[ICMP1]](i1), %bb.1 ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C7]], [[C6]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[COPY14]](i1) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[C7]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[C6]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY15]](i1), [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV1]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - %0:_(s32) = COPY $vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(p0) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_CONSTANT i32 -1 - %7:_(s1) = G_CONSTANT i1 true + %0:_(i32) = COPY $vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(p0) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 -1 + %7:_(i1) = G_CONSTANT i1 true bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %8:_(s32) = G_PHI %6(s32), %bb.0, %9(s32), %bb.3 + %8:_(i32) = G_PHI %6(i32), %bb.0, %9(i32), %bb.3 %10:_(p1) = G_PHI %2(p1), %bb.0, %11(p1), %bb.3 - %12:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.3 - %14:sreg_32_xm0_xexec(s32) = SI_IF %12(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %12:sreg_32_xm0_xexec(i1) = G_PHI %7(i1), %bb.0, %13(i1), %bb.3 + %14:sreg_32_xm0_xexec(i32) = SI_IF %12(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.3(0x80000000) - %15:_(s32) = G_LOAD %10(p1) :: (load (s32), addrspace 1) - %16:_(s32) = G_CONSTANT i32 0 - %17:_(s1) = G_ICMP intpred(eq), %15(s32), %16 + %15:_(i32) = G_LOAD %10(p1) :: (load (i32), addrspace 1) + %16:_(i32) = G_CONSTANT i32 0 + %17:_(i1) = G_ICMP intpred(eq), %15(i32), %16 bb.3: successors: %bb.4(0x04000000), %bb.1(0x7c000000) - %13:_(s1) = G_PHI %17(s1), %bb.2, %12(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - %18:_(s64) = G_CONSTANT i64 4 - %11:_(p1) = G_PTR_ADD %10, %18(s64) - %19:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = nsw G_ADD %8, %19 - %20:_(s32) = G_CONSTANT i32 10 - %21:_(s1) = G_ICMP intpred(sge), %9(s32), %20 - G_BRCOND %21(s1), %bb.1 + %13:_(i1) = G_PHI %17(i1), %bb.2, %12(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(i32) + %18:_(i64) = G_CONSTANT i64 4 + %11:_(p1) = G_PTR_ADD %10, %18(i64) + %19:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = nsw G_ADD %8, %19 + %20:_(i32) = G_CONSTANT i32 10 + %21:_(i1) = G_ICMP intpred(sge), %9(i32), %20 + G_BRCOND %21(i1), %bb.1 G_BR %bb.4 bb.4: - %22:_(s1) = G_PHI %12(s1), %bb.3 - %23:_(s32) = G_FCONSTANT float 0.000000e+00 - %24:_(s32) = G_FCONSTANT float 1.000000e+00 - %25:_(s32) = G_SELECT %22(s1), %24, %23 - G_STORE %25(s32), %5(p0) :: (store (s32)) + %22:_(i1) = G_PHI %12(i1), %bb.3 + %23:_(f32) = G_FCONSTANT float 0.000000e+00 + %24:_(f32) = G_FCONSTANT float 1.000000e+00 + %25:_(i32) = G_BITCAST %24(f32) + %26:_(i32) = G_BITCAST %23(f32) + %27:_(i32) = G_SELECT %22(i1), %25, %26 + G_STORE %27(i32), %5(p0) :: (store (i32)) SI_RETURN + + + + + + + + ... --- @@ -248,88 +270,100 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[FCMP]](i1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %27(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) - ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF]](i1), %bb.0, %31(i1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[COPY4]](i1), %bb.0, %28(i1), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI %10(i32), %bb.1, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %12(i32), %bb.1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[COPY6]], [[C2]] + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[XOR]](i1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[PHI3]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI3]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](i1), [[PHI2]](i32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[XOR]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY5]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY7]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.1 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[C5]](f32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[C4]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY9]](i1), [[BITCAST2]], [[BITCAST3]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 0 - %6:_(s32) = G_FCONSTANT float 1.000000e+00 - %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p0) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = G_CONSTANT i32 0 + %6:_(f32) = G_FCONSTANT float 1.000000e+00 + %7:_(f32) = G_BITCAST %1(i32) + %8:_(i1) = G_FCMP floatpred(ogt), %7(f32), %6 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + %9:_(i32) = G_PHI %10(i32), %bb.1, %5(i32), %bb.0 + %11:_(i32) = G_PHI %5(i32), %bb.0, %12(i32), %bb.1 + %13:_(i1) = G_PHI %8(i1), %bb.0, %14(i1), %bb.1 + %15:_(i1) = G_CONSTANT i1 true + %14:_(i1) = G_XOR %13, %15 + %16:_(f32) = G_UITOFP %11(i32) + %17:_(f32) = G_BITCAST %0(i32) + %18:_(i1) = G_FCMP floatpred(ogt), %16(f32), %17 + %19:_(i32) = G_CONSTANT i32 1 + %12:_(i32) = G_ADD %11, %19 + %10:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(i1), %9(i32) + SI_LOOP %10(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec - %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0 - %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1 - %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1 - %14:_(s1) = G_CONSTANT i1 true - %13:_(s1) = G_XOR %12, %14 - %15:_(s32) = G_UITOFP %10(s32) - %16:_(s1) = G_FCMP floatpred(ogt), %15(s32), %0 - %17:_(s32) = G_CONSTANT i32 1 - %11:_(s32) = G_ADD %10, %17 - %9:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %16(s1), %8(s32) - SI_LOOP %9(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %18:_(s1) = G_PHI %13(s1), %bb.1 - %19:_(s32) = G_PHI %9(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32) - %20:_(s32) = G_FCONSTANT float 0.000000e+00 - %21:_(s32) = G_FCONSTANT float 1.000000e+00 - %22:_(s32) = G_SELECT %18(s1), %21, %20 - G_STORE %22(s32), %4(p0) :: (store (s32)) + %20:_(i1) = G_PHI %14(i1), %bb.1 + %21:_(i32) = G_PHI %10(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %21(i32) + %22:_(f32) = G_FCONSTANT float 0.000000e+00 + %23:_(f32) = G_FCONSTANT float 1.000000e+00 + %24:_(i32) = G_BITCAST %23(f32) + %25:_(i32) = G_BITCAST %22(f32) + %26:_(i32) = G_SELECT %20(i1), %24, %25 + G_STORE %26(i32), %4(p0) :: (store (i32)) SI_RETURN + + + + ... --- @@ -342,216 +376,232 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY5]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[COPY5]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %40(s1), %bb.8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[COPY5]](i1), %bb.0, %40(i1), %bb.8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY7]](i1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %73(s1), %bb.7 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %62(s1), %bb.7 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %49(s1), %bb.7 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7 - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[DEF3]](i1), %bb.1, %73(i1), %bb.7 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(i1) = PHI [[DEF2]](i1), %bb.1, %62(i1), %bb.7 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.1, %49(i1), %bb.7 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[C2]](i32), %bb.1, %17(i32), %bb.7 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI %19(i32), %bb.7, [[C2]](i32), %bb.1 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[PHI2]](i1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[C3]](i1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[C3]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI5]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C4]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C5]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY12]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY9]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY11]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_1]](i1) + ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP1]](i1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[C6]](i1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI5]], [[C7]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[PHI5]](i32), [[COPY]] + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY13]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY15]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY14]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY16]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GFX10-NEXT: G_STORE [[C8]](s32), [[MV1]](p0) :: (store (s32)) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; GFX10-NEXT: G_STORE [[C8]](i32), [[MV1]](p0) :: (store (i32)) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](i32) ; GFX10-NEXT: SI_RETURN ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY18]], [[C9]] - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI4]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_1]](i1), %bb.3, [[S_OR_B32_3]](i1), %bb.4 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.3, [[S_OR_B32_2]](i1), %bb.4 + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(i32) = G_PHI [[ADD]](i32), %bb.4, [[DEF]](i32), %bb.3 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(i1) = COPY [[PHI6]](i1) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(i1) = COPY [[PHI7]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](i32) + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[COPY18]], [[C9]] + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(i1) = COPY [[XOR]](i1) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](i1), [[PHI4]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY19]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_4]](i1), [[S_AND_B32_4]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY20]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.7 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_4]](i1) + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[COPY20]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY21]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_5]](i1), [[S_AND_B32_5]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = COPY $vgpr3 - %5:_(s32) = COPY $vgpr4 - %6:_(p0) = G_MERGE_VALUES %4(s32), %5(s32) - %7:_(s32) = G_IMPLICIT_DEF - %8:_(s32) = G_CONSTANT i32 0 - %9:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %8 - %10:_(s1) = G_CONSTANT i1 true - %11:sreg_32_xm0_xexec(s32) = SI_IF %9(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = COPY $vgpr3 + %5:_(i32) = COPY $vgpr4 + %6:_(p0) = G_MERGE_VALUES %4(i32), %5(i32) + %7:_(i32) = G_IMPLICIT_DEF + %8:_(i32) = G_CONSTANT i32 0 + %9:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), %0(i32), %8 + %10:_(i1) = G_CONSTANT i1 true + %11:sreg_32_xm0_xexec(i32) = SI_IF %9(i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 bb.1: successors: %bb.3(0x80000000) - %12:_(s32) = G_CONSTANT i32 0 + %12:_(i32) = G_CONSTANT i32 0 G_BR %bb.3 bb.2: successors: %bb.5(0x40000000), %bb.6(0x40000000) - %13:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.8, %10(s1), %bb.0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32) - %15:sreg_32_xm0_xexec(s32) = SI_IF %13(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + %13:sreg_32_xm0_xexec(i1) = G_PHI %14(i1), %bb.8, %10(i1), %bb.0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(i32) + %15:sreg_32_xm0_xexec(i32) = SI_IF %13(i1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.5 bb.3: successors: %bb.4(0x40000000), %bb.7(0x40000000) - %16:_(s32) = G_PHI %12(s32), %bb.1, %17(s32), %bb.7 - %18:_(s32) = G_PHI %19(s32), %bb.7, %12(s32), %bb.1 - %20:_(s1) = G_CONSTANT i1 true - %21:_(s64) = G_SEXT %18(s32) - %22:_(s32) = G_CONSTANT i32 2 - %23:_(s64) = G_SHL %21, %22(s32) - %24:_(p1) = G_PTR_ADD %3, %23(s64) - %25:_(s32) = G_LOAD %24(p1) :: (load (s32), addrspace 1) - %26:_(s32) = G_CONSTANT i32 0 - %27:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %25(s32), %26 - %28:sreg_32_xm0_xexec(s32) = SI_IF %27(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + %16:_(i32) = G_PHI %12(i32), %bb.1, %17(i32), %bb.7 + %18:_(i32) = G_PHI %19(i32), %bb.7, %12(i32), %bb.1 + %20:_(i1) = G_CONSTANT i1 true + %21:_(i64) = G_SEXT %18(i32) + %22:_(i32) = G_CONSTANT i32 2 + %23:_(i64) = G_SHL %21, %22(i32) + %24:_(p1) = G_PTR_ADD %3, %23(i64) + %25:_(i32) = G_LOAD %24(p1) :: (load (i32), addrspace 1) + %26:_(i32) = G_CONSTANT i32 0 + %27:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %25(i32), %26 + %28:sreg_32_xm0_xexec(i32) = SI_IF %27(i1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.4: successors: %bb.7(0x80000000) - %29:_(s1) = G_CONSTANT i1 false - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %18, %30 - %32:_(s1) = G_ICMP intpred(slt), %18(s32), %0 + %29:_(i1) = G_CONSTANT i1 false + %30:_(i32) = G_CONSTANT i32 1 + %31:_(i32) = G_ADD %18, %30 + %32:_(i1) = G_ICMP intpred(slt), %18(i32), %0 G_BR %bb.7 bb.5: successors: %bb.6(0x80000000) - %33:_(s32) = G_CONSTANT i32 5 - G_STORE %33(s32), %6(p0) :: (store (s32)) + %33:_(i32) = G_CONSTANT i32 5 + G_STORE %33(i32), %6(p0) :: (store (i32)) bb.6: - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) SI_RETURN bb.7: successors: %bb.8(0x04000000), %bb.3(0x7c000000) - %19:_(s32) = G_PHI %31(s32), %bb.4, %7(s32), %bb.3 - %34:_(s1) = G_PHI %29(s1), %bb.4, %20(s1), %bb.3 - %35:_(s1) = G_PHI %32(s1), %bb.4, %20(s1), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s32) - %36:_(s1) = G_CONSTANT i1 true - %37:_(s1) = G_XOR %34, %36 - %17:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %35(s1), %16(s32) - SI_LOOP %17(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %19:_(i32) = G_PHI %31(i32), %bb.4, %7(i32), %bb.3 + %34:_(i1) = G_PHI %29(i1), %bb.4, %20(i1), %bb.3 + %35:_(i1) = G_PHI %32(i1), %bb.4, %20(i1), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(i32) + %36:_(i1) = G_CONSTANT i1 true + %37:_(i1) = G_XOR %34, %36 + %17:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %35(i1), %16(i32) + SI_LOOP %17(i32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.8 bb.8: successors: %bb.2(0x80000000) - %14:_(s1) = G_PHI %37(s1), %bb.7 - %38:_(s32) = G_PHI %17(s32), %bb.7 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32) + %14:_(i1) = G_PHI %37(i1), %bb.7 + %38:_(i32) = G_PHI %17(i32), %bb.7 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(i32) G_BR %bb.2 + + + + + + + + + + + + + + + + ... --- @@ -564,178 +614,196 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %39(s1), %bb.6 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[DEF1]](i1), %bb.0, %39(i1), %bb.6 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %11(i32), %bb.6, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %13(i32), %bb.6 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI]](i1) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[PHI2]] + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) - ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI2]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C1]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](i64) + ; GFX10-NEXT: G_STORE [[PHI2]](i32), [[PTR_ADD]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[PHI2]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[C2]](i1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[COPY8]](i1) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP1]](i1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[C3]](i1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C4]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY9]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY10]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[COPY8]](i1), %bb.4, [[S_OR_B32_]](i1), %bb.5 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[ADD]](i32), %bb.5, [[DEF]](i32), %bb.4 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](i1), [[PHI1]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY7]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.6 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY12]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.6 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(i32) = G_PHI [[PHI2]](i32), %bb.6 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[S_OR_B32_1]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](i32) + ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY12]](i1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: ; GFX10-NEXT: successors: %bb.9(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: G_STORE [[PHI6]](s32), [[MV1]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: G_STORE [[PHI6]](i32), [[MV1]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.9: - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](i32) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = COPY $vgpr6 - %6:_(s32) = COPY $vgpr7 - %7:_(p1) = G_MERGE_VALUES %5(s32), %6(s32) - %8:_(s32) = G_CONSTANT i32 0 - %9:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = COPY $vgpr6 + %6:_(i32) = COPY $vgpr7 + %7:_(p1) = G_MERGE_VALUES %5(i32), %6(i32) + %8:_(i32) = G_CONSTANT i32 0 + %9:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.2(0x80000000) - %10:_(s32) = G_PHI %11(s32), %bb.6, %8(s32), %bb.0 - %12:_(s32) = G_PHI %8(s32), %bb.0, %13(s32), %bb.6 + %10:_(i32) = G_PHI %11(i32), %bb.6, %8(i32), %bb.0 + %12:_(i32) = G_PHI %8(i32), %bb.0, %13(i32), %bb.6 bb.2: successors: %bb.3(0x40000000), %bb.4(0x40000000) - %14:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %12 - %15:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + %14:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), %0(i32), %12 + %15:sreg_32_xm0_xexec(i32) = SI_IF %14(i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.3 bb.3: successors: %bb.4(0x80000000) - %16:_(s64) = G_SEXT %12(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %4, %18(s64) - G_STORE %12(s32), %19(p1) :: (store (s32), addrspace 1) + %16:_(i64) = G_SEXT %12(i32) + %17:_(i32) = G_CONSTANT i32 2 + %18:_(i64) = G_SHL %16, %17(i32) + %19:_(p1) = G_PTR_ADD %4, %18(i64) + G_STORE %12(i32), %19(p1) :: (store (i32), addrspace 1) bb.4: successors: %bb.5(0x40000000), %bb.6(0x40000000) - %20:_(s1) = G_CONSTANT i1 true - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12 - %22:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + %20:_(i1) = G_CONSTANT i1 true + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) + %21:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %1(i32), %12 + %22:sreg_32_xm0_xexec(i32) = SI_IF %21(i1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.5 bb.5: successors: %bb.6(0x80000000) - %23:_(s1) = G_CONSTANT i1 false - %24:_(s32) = G_CONSTANT i32 1 - %25:_(s32) = G_ADD %12, %24 + %23:_(i1) = G_CONSTANT i1 false + %24:_(i32) = G_CONSTANT i32 1 + %25:_(i32) = G_ADD %12, %24 bb.6: successors: %bb.7(0x04000000), %bb.1(0x7c000000) - %13:_(s32) = G_PHI %25(s32), %bb.5, %9(s32), %bb.4 - %26:_(s1) = G_PHI %23(s1), %bb.5, %20(s1), %bb.4 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %22(s32) - %11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %26(s1), %10(s32) - SI_LOOP %11(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %13:_(i32) = G_PHI %25(i32), %bb.5, %9(i32), %bb.4 + %26:_(i1) = G_PHI %23(i1), %bb.5, %20(i1), %bb.4 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %22(i32) + %11:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %26(i1), %10(i32) + SI_LOOP %11(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.7 bb.7: successors: %bb.8(0x40000000), %bb.9(0x40000000) - %27:_(s32) = G_PHI %11(s32), %bb.6 - %28:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.6 - %29:_(s32) = G_PHI %12(s32), %bb.6 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) - %30:sreg_32_xm0_xexec(s32) = SI_IF %28(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec + %27:_(i32) = G_PHI %11(i32), %bb.6 + %28:sreg_32_xm0_xexec(i1) = G_PHI %14(i1), %bb.6 + %29:_(i32) = G_PHI %12(i32), %bb.6 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(i32) + %30:sreg_32_xm0_xexec(i32) = SI_IF %28(i1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.8 bb.8: successors: %bb.9(0x80000000) - G_STORE %29(s32), %7(p1) :: (store (s32), addrspace 1) + G_STORE %29(i32), %7(p1) :: (store (i32), addrspace 1) bb.9: - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(i32) SI_RETURN + + + + + + + + + + + + + + + + + + ... --- @@ -748,143 +816,155 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %54(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %43(s1), %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %33(s1), %bb.3 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %56(i1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[DEF]](i1), %bb.0, %45(i1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[COPY5]](i1), %bb.0, %35(i1), %bb.3 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI %10(i32), %bb.3, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %12(i32), %bb.3 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI2]](i1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[COPY8]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY7]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY9]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY8]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI4]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP]](i1) + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY11]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI2]](s1), %bb.1, [[DEF2]](s1), %bb.2 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY12]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, [[S_OR_B32_1]](i1), %bb.2 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[PHI2]](i1), %bb.1, [[DEF2]](i1), %bb.2 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI6]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(i1) = G_FREEZE [[COPY12]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[FREEZE]](i1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[FREEZE]](i1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI4]], [[C4]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[PHI4]](i32), [[COPY]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](i1), [[PHI3]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY13]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY15]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY14]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY16]](s1), [[C6]], [[C5]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_3]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[C6]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[C5]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY16]](i1), [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV1]](p0) :: (store (i32)) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = COPY $vgpr3 - %5:_(s32) = COPY $vgpr4 - %6:_(p0) = G_MERGE_VALUES %4(s32), %5(s32) - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s1) = G_CONSTANT i1 true + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = COPY $vgpr3 + %5:_(i32) = COPY $vgpr4 + %6:_(p0) = G_MERGE_VALUES %4(i32), %5(i32) + %7:_(i32) = G_CONSTANT i32 0 + %8:_(i1) = G_CONSTANT i1 true bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %9:_(s32) = G_PHI %10(s32), %bb.3, %7(s32), %bb.0 - %11:_(s32) = G_PHI %7(s32), %bb.0, %12(s32), %bb.3 - %13:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %14(s1), %bb.3 - %15:sreg_32_xm0_xexec(s32) = SI_IF %13(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %9:_(i32) = G_PHI %10(i32), %bb.3, %7(i32), %bb.0 + %11:_(i32) = G_PHI %7(i32), %bb.0, %12(i32), %bb.3 + %13:sreg_32_xm0_xexec(i1) = G_PHI %8(i1), %bb.0, %14(i1), %bb.3 + %15:sreg_32_xm0_xexec(i32) = SI_IF %13(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.3(0x80000000) - %16:_(s64) = G_SEXT %11(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %3, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:_(s1) = G_ICMP intpred(eq), %20(s32), %21 + %16:_(i64) = G_SEXT %11(i32) + %17:_(i32) = G_CONSTANT i32 2 + %18:_(i64) = G_SHL %16, %17(i32) + %19:_(p1) = G_PTR_ADD %3, %18(i64) + %20:_(i32) = G_LOAD %19(p1) :: (load (i32), addrspace 1) + %21:_(i32) = G_CONSTANT i32 0 + %22:_(i1) = G_ICMP intpred(eq), %20(i32), %21 bb.3: successors: %bb.4(0x04000000), %bb.1(0x7c000000) - %23:_(s1) = G_PHI %22(s1), %bb.2, %13(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - %14:_(s1) = G_FREEZE %23 - %24:_(s32) = G_CONSTANT i32 1 - %12:_(s32) = G_ADD %11, %24 - %25:_(s1) = G_ICMP intpred(slt), %11(s32), %0 - %10:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(s1), %9(s32) - SI_LOOP %10(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %23:_(i1) = G_PHI %22(i1), %bb.2, %13(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) + %14:_(i1) = G_FREEZE %23 + %24:_(i32) = G_CONSTANT i32 1 + %12:_(i32) = G_ADD %11, %24 + %25:_(i1) = G_ICMP intpred(slt), %11(i32), %0 + %10:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(i1), %9(i32) + SI_LOOP %10(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.4: - %26:_(s1) = G_PHI %14(s1), %bb.3 - %27:_(s32) = G_PHI %10(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) - %28:_(s32) = G_FCONSTANT float 0.000000e+00 - %29:_(s32) = G_FCONSTANT float 1.000000e+00 - %30:_(s32) = G_SELECT %26(s1), %29, %28 - G_STORE %30(s32), %6(p0) :: (store (s32)) + %26:_(i1) = G_PHI %14(i1), %bb.3 + %27:_(i32) = G_PHI %10(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(i32) + %28:_(f32) = G_FCONSTANT float 0.000000e+00 + %29:_(f32) = G_FCONSTANT float 1.000000e+00 + %30:_(i32) = G_BITCAST %29(f32) + %31:_(i32) = G_BITCAST %28(f32) + %32:_(i32) = G_SELECT %26(i1), %30, %31 + G_STORE %32(i32), %6(p0) :: (store (i32)) S_ENDPGM 0 + + + + + + + + ... --- @@ -897,189 +977,201 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[DEF3]](i1), %bb.0, %67(i1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[DEF2]](i1), %bb.0, %56(i1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %43(i1), %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI %12(i32), %bb.5, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %14(i32), %bb.5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[PHI2]](i1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI4]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY10]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY7]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY9]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_1]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX10-NEXT: G_STORE [[C4]](s32), [[MV2]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX10-NEXT: G_STORE [[C4]](i32), [[MV2]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] - ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]] - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[C5]](i1) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C6]](i32) + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](i64) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[LOAD1]], [[C7]] + ; GFX10-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD1]](p1) :: (store (i32), addrspace 1) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI4]], [[C7]] + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[PHI4]](i32), [[C8]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY11]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY13]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY12]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY14]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(i32) ; GFX10-NEXT: S_ENDPGM 0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_1]](i1), %bb.1, [[S_OR_B32_3]](i1), %bb.3 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, [[S_OR_B32_2]](i1), %bb.3 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(i32) = G_PHI [[ADD1]](i32), %bb.3, [[DEF]](i32), %bb.1 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[PHI6]](i1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[COPY16]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](i1), [[PHI3]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY17]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_4]](i1), [[S_AND_B32_4]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[S_OR_B32_4]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](i32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY18]](i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.3(0x40000000), %bb.5(0x40000000) - %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0 - %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5 - %15:_(s1) = G_CONSTANT i1 true - %16:_(s64) = G_SEXT %13(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %5, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21 - %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(i32) = G_PHI %12(i32), %bb.5, %9(i32), %bb.0 + %13:_(i32) = G_PHI %9(i32), %bb.0, %14(i32), %bb.5 + %15:_(i1) = G_CONSTANT i1 true + %16:_(i64) = G_SEXT %13(i32) + %17:_(i32) = G_CONSTANT i32 2 + %18:_(i64) = G_SHL %16, %17(i32) + %19:_(p1) = G_PTR_ADD %5, %18(i64) + %20:_(i32) = G_LOAD %19(p1) :: (load (i32), addrspace 1) + %21:_(i32) = G_CONSTANT i32 0 + %22:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %20(i32), %21 + %23:sreg_32_xm0_xexec(i32) = SI_IF %22(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.3 bb.2: successors: %bb.4(0x80000000) - %24:_(s32) = G_CONSTANT i32 10 - G_STORE %24(s32), %8(p1) :: (store (s32), addrspace 1) + %24:_(i32) = G_CONSTANT i32 10 + G_STORE %24(i32), %8(p1) :: (store (i32), addrspace 1) G_BR %bb.4 bb.3: successors: %bb.5(0x80000000) - %25:_(s1) = G_CONSTANT i1 false - %26:_(s32) = G_CONSTANT i32 2 - %27:_(s64) = G_SHL %16, %26(s32) - %28:_(p1) = G_PTR_ADD %2, %27(s64) - %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1) - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %29, %30 - G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1) - %32:_(s32) = G_ADD %13, %30 - %33:_(s32) = G_CONSTANT i32 100 - %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33 + %25:_(i1) = G_CONSTANT i1 false + %26:_(i32) = G_CONSTANT i32 2 + %27:_(i64) = G_SHL %16, %26(i32) + %28:_(p1) = G_PTR_ADD %2, %27(i64) + %29:_(i32) = G_LOAD %28(p1) :: (load (i32), addrspace 1) + %30:_(i32) = G_CONSTANT i32 1 + %31:_(i32) = G_ADD %29, %30 + G_STORE %31(i32), %28(p1) :: (store (i32), addrspace 1) + %32:_(i32) = G_ADD %13, %30 + %33:_(i32) = G_CONSTANT i32 100 + %34:_(i1) = G_ICMP intpred(ult), %13(i32), %33 G_BR %bb.5 bb.4: - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(i32) S_ENDPGM 0 bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1 - %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1 - %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32) - %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32) - SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %14:_(i32) = G_PHI %32(i32), %bb.3, %10(i32), %bb.1 + %36:_(i1) = G_PHI %25(i1), %bb.3, %15(i1), %bb.1 + %37:_(i1) = G_PHI %34(i1), %bb.3, %15(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(i32) + %12:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(i1), %11(i32) + SI_LOOP %12(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.6: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5 - %39:_(s32) = G_PHI %12(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32) - %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + %38:sreg_32_xm0_xexec(i1) = G_PHI %36(i1), %bb.5 + %39:_(i32) = G_PHI %12(i32), %bb.5 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(i32) + %35:sreg_32_xm0_xexec(i32) = SI_IF %38(i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 + + + + + + + + + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir index 39ebf66411cc6..bd37e402aa4c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir @@ -11,69 +11,73 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP]](i1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[COPY4]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP1]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY2]](i32), [[C2]] + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY5]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY6]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[COPY4]](i1), %bb.0, [[S_OR_B32_]](i1), %bb.1 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY7]](i1), [[C4]], [[C3]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(s32) = G_CONSTANT i32 6 - %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 - %7:_(s32) = G_CONSTANT i32 0 - %8:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %4(s32), %7 - %9:sreg_32_xm0_xexec(s32) = SI_IF %8(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i32) = G_CONSTANT i32 6 + %6:_(i1) = G_ICMP intpred(uge), %3(i32), %5 + %7:_(i32) = G_CONSTANT i32 0 + %8:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), %4(i32), %7 + %9:sreg_32_xm0_xexec(i32) = SI_IF %8(i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %10:_(s32) = G_CONSTANT i32 1 - %11:_(s1) = G_ICMP intpred(ult), %3(s32), %10 + %10:_(i32) = G_CONSTANT i32 1 + %11:_(i1) = G_ICMP intpred(ult), %3(i32), %10 bb.2: - %12:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32) - %13:_(s32) = G_CONSTANT i32 2 - %14:_(s32) = G_CONSTANT i32 1 - %15:_(s32) = G_SELECT %12(s1), %14, %13 - G_STORE %15(s32), %2(p1) :: (store (s32), addrspace 1) + %12:_(i1) = G_PHI %6(i1), %bb.0, %11(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(i32) + %13:_(i32) = G_CONSTANT i32 2 + %14:_(i32) = G_CONSTANT i32 1 + %15:_(i32) = G_SELECT %12(i1), %14, %13 + G_STORE %15(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + ... --- @@ -86,104 +90,112 @@ body: | ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i1) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY3]](i32), [[C]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[DEF]](i1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[COPY4]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %20(s1), %bb.3 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[COPY6]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_ELSE [[SI_IF]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[COPY4]](i1), %bb.0, %20(i1), %bb.3 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[COPY6]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[COPY7]](i1) + ; GFX10-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_ELSE [[SI_IF]](i32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C1]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[COPY2]](i32), [[C1]] + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY9]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY2]](i32), [[C2]] + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY5]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY10]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY7]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C3]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[COPY7]](i1), %bb.1, [[S_OR_B32_]](i1), %bb.2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY11]](i1), [[C3]], [[C4]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.3(0x40000000), %bb.1(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(s1) = G_IMPLICIT_DEF - %6:_(s32) = G_CONSTANT i32 0 - %7:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %4(s32), %6 - %8:sreg_32_xm0_xexec(s32) = SI_IF %7(s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i1) = G_IMPLICIT_DEF + %6:_(i32) = G_CONSTANT i32 0 + %7:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %4(i32), %6 + %8:sreg_32_xm0_xexec(i32) = SI_IF %7(i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.3 bb.1: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %9:_(s1) = G_PHI %10(s1), %bb.3, %5(s1), %bb.0 - %11:sreg_32_xm0_xexec(s32) = SI_ELSE %8(s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + %9:_(i1) = G_PHI %10(i1), %bb.3, %5(i1), %bb.0 + %11:sreg_32_xm0_xexec(i32) = SI_ELSE %8(i32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x80000000) - %12:_(s32) = G_CONSTANT i32 1 - %13:_(s1) = G_ICMP intpred(uge), %3(s32), %12 + %12:_(i32) = G_CONSTANT i32 1 + %13:_(i1) = G_ICMP intpred(uge), %3(i32), %12 G_BR %bb.4 bb.3: successors: %bb.1(0x80000000) - %14:_(s32) = G_CONSTANT i32 2 - %10:_(s1) = G_ICMP intpred(ult), %3(s32), %14 + %14:_(i32) = G_CONSTANT i32 2 + %10:_(i1) = G_ICMP intpred(ult), %3(i32), %14 G_BR %bb.1 bb.4: - %15:_(s1) = G_PHI %9(s1), %bb.1, %13(s1), %bb.2 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32) - %16:_(s32) = G_CONSTANT i32 1 - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s32) = G_SELECT %15(s1), %16, %17 - G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1) + %15:_(i1) = G_PHI %9(i1), %bb.1, %13(i1), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(i32) + %16:_(i32) = G_CONSTANT i32 1 + %17:_(i32) = G_CONSTANT i32 2 + %18:_(i32) = G_SELECT %15(i1), %16, %17 + G_STORE %18(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + + + + + ... --- @@ -196,129 +208,137 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %35(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %35(i1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %9(i32), %bb.3, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %11(i32), %bb.3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI2]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY4]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY5]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C5]] - ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C5]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C6]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C4]](i32) + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](i64) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[LOAD1]], [[C5]] + ; GFX10-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD1]](p1) :: (store (i32), addrspace 1) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[PHI2]](i32), [[C6]] + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY7]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.2, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY8]](s1), [[PHI1]](s32) - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, [[S_OR_B32_1]](i1), %bb.2 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[ADD1]](i32), %bb.2, [[DEF]](i32), %bb.1 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY8]](i1), [[PHI1]](i32) + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](i32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_CONSTANT i32 0 - %7:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 0 + %7:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %8:_(s32) = G_PHI %9(s32), %bb.3, %6(s32), %bb.0 - %10:_(s32) = G_PHI %6(s32), %bb.0, %11(s32), %bb.3 - %12:_(s1) = G_CONSTANT i1 true - %13:_(s64) = G_SEXT %10(s32) - %14:_(s32) = G_CONSTANT i32 2 - %15:_(s64) = G_SHL %13, %14(s32) - %16:_(p1) = G_PTR_ADD %5, %15(s64) - %17:_(s32) = G_LOAD %16(p1) :: (load (s32), addrspace 1) - %18:_(s32) = G_CONSTANT i32 0 - %19:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %17(s32), %18 - %20:sreg_32_xm0_xexec(s32) = SI_IF %19(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %8:_(i32) = G_PHI %9(i32), %bb.3, %6(i32), %bb.0 + %10:_(i32) = G_PHI %6(i32), %bb.0, %11(i32), %bb.3 + %12:_(i1) = G_CONSTANT i1 true + %13:_(i64) = G_SEXT %10(i32) + %14:_(i32) = G_CONSTANT i32 2 + %15:_(i64) = G_SHL %13, %14(i32) + %16:_(p1) = G_PTR_ADD %5, %15(i64) + %17:_(i32) = G_LOAD %16(p1) :: (load (i32), addrspace 1) + %18:_(i32) = G_CONSTANT i32 0 + %19:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %17(i32), %18 + %20:sreg_32_xm0_xexec(i32) = SI_IF %19(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.3(0x80000000) - %21:_(s32) = G_CONSTANT i32 2 - %22:_(s64) = G_SHL %13, %21(s32) - %23:_(p1) = G_PTR_ADD %2, %22(s64) - %24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1) - %25:_(s32) = G_CONSTANT i32 1 - %26:_(s32) = G_ADD %24, %25 - G_STORE %26(s32), %23(p1) :: (store (s32), addrspace 1) - %27:_(s32) = G_ADD %10, %25 - %28:_(s32) = G_CONSTANT i32 100 - %29:_(s1) = G_ICMP intpred(ult), %10(s32), %28 + %21:_(i32) = G_CONSTANT i32 2 + %22:_(i64) = G_SHL %13, %21(i32) + %23:_(p1) = G_PTR_ADD %2, %22(i64) + %24:_(i32) = G_LOAD %23(p1) :: (load (i32), addrspace 1) + %25:_(i32) = G_CONSTANT i32 1 + %26:_(i32) = G_ADD %24, %25 + G_STORE %26(i32), %23(p1) :: (store (i32), addrspace 1) + %27:_(i32) = G_ADD %10, %25 + %28:_(i32) = G_CONSTANT i32 100 + %29:_(i1) = G_ICMP intpred(ult), %10(i32), %28 bb.3: successors: %bb.4(0x04000000), %bb.1(0x7c000000) - %11:_(s32) = G_PHI %27(s32), %bb.2, %7(s32), %bb.1 - %30:_(s1) = G_PHI %29(s1), %bb.2, %12(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s32) - %9:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %30(s1), %8(s32) - SI_LOOP %9(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(i32) = G_PHI %27(i32), %bb.2, %7(i32), %bb.1 + %30:_(i1) = G_PHI %29(i1), %bb.2, %12(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(i32) + %9:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %30(i1), %8(i32) + SI_LOOP %9(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.4: - %31:_(s32) = G_PHI %9(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32) + %31:_(i32) = G_PHI %9(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(i32) S_ENDPGM 0 + + + + + + + + ... --- @@ -331,184 +351,196 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %48(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %48(i1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %12(i32), %bb.3, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %14(i32), %bb.3 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI2]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY7]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C5]](i32) + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](i64) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD1]](i32), [[C6]] + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[C4]](i1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[COPY9]](i1) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP1]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, %47(i1), %bb.5 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI %32(i32), %bb.5, [[DEF]](i32), %bb.1 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](i1), [[PHI1]](i32) + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]] - ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C7]](i32) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](i64) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[LOAD2]], [[C8]] + ; GFX10-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD2]](p1) :: (store (i32), addrspace 1) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C8]] + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[PHI2]](i32), [[C9]] + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY12]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[COPY9]](i1), %bb.2, [[S_OR_B32_1]](i1), %bb.4 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(i32) = G_PHI [[ADD1]](i32), %bb.4, [[DEF]](i32), %bb.2 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[COPY13]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY14]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %11:_(s32) = G_PHI %12(s32), %bb.3, %9(s32), %bb.0 - %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.3 - %15:_(s1) = G_CONSTANT i1 true - %16:_(s64) = G_SEXT %13(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %5, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21 - %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(i32) = G_PHI %12(i32), %bb.3, %9(i32), %bb.0 + %13:_(i32) = G_PHI %9(i32), %bb.0, %14(i32), %bb.3 + %15:_(i1) = G_CONSTANT i1 true + %16:_(i64) = G_SEXT %13(i32) + %17:_(i32) = G_CONSTANT i32 2 + %18:_(i64) = G_SHL %16, %17(i32) + %19:_(p1) = G_PTR_ADD %5, %18(i64) + %20:_(i32) = G_LOAD %19(p1) :: (load (i32), addrspace 1) + %21:_(i32) = G_CONSTANT i32 0 + %22:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %20(i32), %21 + %23:sreg_32_xm0_xexec(i32) = SI_IF %22(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %24:_(s1) = G_CONSTANT i1 true - %25:_(s32) = G_CONSTANT i32 2 - %26:_(s64) = G_SHL %16, %25(s32) - %27:_(p1) = G_PTR_ADD %8, %26(s64) - %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1) - %29:_(s32) = G_CONSTANT i32 0 - %30:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %28(s32), %29 - %31:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %24:_(i1) = G_CONSTANT i1 true + %25:_(i32) = G_CONSTANT i32 2 + %26:_(i64) = G_SHL %16, %25(i32) + %27:_(p1) = G_PTR_ADD %8, %26(i64) + %28:_(i32) = G_LOAD %27(p1) :: (load (i32), addrspace 1) + %29:_(i32) = G_CONSTANT i32 0 + %30:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %28(i32), %29 + %31:sreg_32_xm0_xexec(i32) = SI_IF %30(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %14:_(s32) = G_PHI %32(s32), %bb.5, %10(s32), %bb.1 - %33:_(s1) = G_PHI %34(s1), %bb.5, %15(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32) - %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %33(s1), %11(s32) - SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %14:_(i32) = G_PHI %32(i32), %bb.5, %10(i32), %bb.1 + %33:_(i1) = G_PHI %34(i1), %bb.5, %15(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(i32) + %12:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %33(i1), %11(i32) + SI_LOOP %12(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.4: successors: %bb.5(0x80000000) - %35:_(s32) = G_CONSTANT i32 2 - %36:_(s64) = G_SHL %16, %35(s32) - %37:_(p1) = G_PTR_ADD %2, %36(s64) - %38:_(s32) = G_LOAD %37(p1) :: (load (s32), addrspace 1) - %39:_(s32) = G_CONSTANT i32 1 - %40:_(s32) = G_ADD %38, %39 - G_STORE %40(s32), %37(p1) :: (store (s32), addrspace 1) - %41:_(s32) = G_ADD %13, %39 - %42:_(s32) = G_CONSTANT i32 100 - %43:_(s1) = G_ICMP intpred(ult), %13(s32), %42 + %35:_(i32) = G_CONSTANT i32 2 + %36:_(i64) = G_SHL %16, %35(i32) + %37:_(p1) = G_PTR_ADD %2, %36(i64) + %38:_(i32) = G_LOAD %37(p1) :: (load (i32), addrspace 1) + %39:_(i32) = G_CONSTANT i32 1 + %40:_(i32) = G_ADD %38, %39 + G_STORE %40(i32), %37(p1) :: (store (i32), addrspace 1) + %41:_(i32) = G_ADD %13, %39 + %42:_(i32) = G_CONSTANT i32 100 + %43:_(i1) = G_ICMP intpred(ult), %13(i32), %42 bb.5: successors: %bb.3(0x80000000) - %32:_(s32) = G_PHI %41(s32), %bb.4, %10(s32), %bb.2 - %34:_(s1) = G_PHI %43(s1), %bb.4, %24(s1), %bb.2 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32) + %32:_(i32) = G_PHI %41(i32), %bb.4, %10(i32), %bb.2 + %34:_(i1) = G_PHI %43(i1), %bb.4, %24(i1), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(i32) G_BR %bb.3 bb.6: - %44:_(s32) = G_PHI %12(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %44(s32) + %44:_(i32) = G_PHI %12(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %44(i32) S_ENDPGM 0 + + + + + + + + + + + + ... --- @@ -521,239 +553,255 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](i32), [[COPY7]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %61(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %61(i1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %15(i32), %bb.3, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %17(i32), %bb.3 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI2]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY9]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C5]](i32) + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](i64) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD1]](i32), [[C6]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[C4]](i1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[COPY11]](i1) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP1]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI1]](s32) - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, %60(i1), %bb.5 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI %35(i32), %bb.5, [[DEF]](i32), %bb.1 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](i1), [[PHI1]](i32) + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C9]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) - ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP2]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C8]](i32) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](i64) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD2]](i32), [[C9]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[C7]](i1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[COPY14]](i1) + ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP2]](i1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %72(s1), %bb.7 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[COPY11]](i1), %bb.2, %72(i1), %bb.7 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(i32) = G_PHI %46(i32), %bb.7, [[DEF]](i32), %bb.2 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(i1) = COPY [[COPY16]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY17]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C10]](s32) - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL3]](s64) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[C11]] - ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD3]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C11]] - ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C12]] - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP3]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C10]](i32) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL3]](i64) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[LOAD3]], [[C11]] + ; GFX10-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD3]](p1) :: (store (i32), addrspace 1) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C11]] + ; GFX10-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[PHI2]](i32), [[C12]] + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP3]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY15]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY18]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.4, [[S_OR_B32_2]](s1), %bb.6 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.6, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[COPY19]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(i1) = PHI [[COPY14]](i1), %bb.4, [[S_OR_B32_2]](i1), %bb.6 + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(i32) = G_PHI [[ADD1]](i32), %bb.6, [[DEF]](i32), %bb.4 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(i1) = COPY [[PHI7]](i1) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(i1) = COPY [[COPY19]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY12]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY20]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) + ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](i32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = COPY $vgpr6 - %10:_(s32) = COPY $vgpr7 - %11:_(p1) = G_MERGE_VALUES %9(s32), %10(s32) - %12:_(s32) = G_CONSTANT i32 0 - %13:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = COPY $vgpr6 + %10:_(i32) = COPY $vgpr7 + %11:_(p1) = G_MERGE_VALUES %9(i32), %10(i32) + %12:_(i32) = G_CONSTANT i32 0 + %13:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %14:_(s32) = G_PHI %15(s32), %bb.3, %12(s32), %bb.0 - %16:_(s32) = G_PHI %12(s32), %bb.0, %17(s32), %bb.3 - %18:_(s1) = G_CONSTANT i1 true - %19:_(s64) = G_SEXT %16(s32) - %20:_(s32) = G_CONSTANT i32 2 - %21:_(s64) = G_SHL %19, %20(s32) - %22:_(p1) = G_PTR_ADD %5, %21(s64) - %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) - %24:_(s32) = G_CONSTANT i32 0 - %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 - %26:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %14:_(i32) = G_PHI %15(i32), %bb.3, %12(i32), %bb.0 + %16:_(i32) = G_PHI %12(i32), %bb.0, %17(i32), %bb.3 + %18:_(i1) = G_CONSTANT i1 true + %19:_(i64) = G_SEXT %16(i32) + %20:_(i32) = G_CONSTANT i32 2 + %21:_(i64) = G_SHL %19, %20(i32) + %22:_(p1) = G_PTR_ADD %5, %21(i64) + %23:_(i32) = G_LOAD %22(p1) :: (load (i32), addrspace 1) + %24:_(i32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %23(i32), %24 + %26:sreg_32_xm0_xexec(i32) = SI_IF %25(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %27:_(s1) = G_CONSTANT i1 true - %28:_(s32) = G_CONSTANT i32 2 - %29:_(s64) = G_SHL %19, %28(s32) - %30:_(p1) = G_PTR_ADD %8, %29(s64) - %31:_(s32) = G_LOAD %30(p1) :: (load (s32), addrspace 1) - %32:_(s32) = G_CONSTANT i32 0 - %33:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %31(s32), %32 - %34:sreg_32_xm0_xexec(s32) = SI_IF %33(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %27:_(i1) = G_CONSTANT i1 true + %28:_(i32) = G_CONSTANT i32 2 + %29:_(i64) = G_SHL %19, %28(i32) + %30:_(p1) = G_PTR_ADD %8, %29(i64) + %31:_(i32) = G_LOAD %30(p1) :: (load (i32), addrspace 1) + %32:_(i32) = G_CONSTANT i32 0 + %33:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %31(i32), %32 + %34:sreg_32_xm0_xexec(i32) = SI_IF %33(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.8(0x04000000), %bb.1(0x7c000000) - %17:_(s32) = G_PHI %35(s32), %bb.5, %13(s32), %bb.1 - %36:_(s1) = G_PHI %37(s1), %bb.5, %18(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32) - %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %14(s32) - SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %17:_(i32) = G_PHI %35(i32), %bb.5, %13(i32), %bb.1 + %36:_(i1) = G_PHI %37(i1), %bb.5, %18(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(i32) + %15:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(i1), %14(i32) + SI_LOOP %15(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.8 bb.4: successors: %bb.6(0x40000000), %bb.7(0x40000000) - %38:_(s1) = G_CONSTANT i1 true - %39:_(s32) = G_CONSTANT i32 2 - %40:_(s64) = G_SHL %19, %39(s32) - %41:_(p1) = G_PTR_ADD %11, %40(s64) - %42:_(s32) = G_LOAD %41(p1) :: (load (s32), addrspace 1) - %43:_(s32) = G_CONSTANT i32 0 - %44:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %42(s32), %43 - %45:sreg_32_xm0_xexec(s32) = SI_IF %44(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + %38:_(i1) = G_CONSTANT i1 true + %39:_(i32) = G_CONSTANT i32 2 + %40:_(i64) = G_SHL %19, %39(i32) + %41:_(p1) = G_PTR_ADD %11, %40(i64) + %42:_(i32) = G_LOAD %41(p1) :: (load (i32), addrspace 1) + %43:_(i32) = G_CONSTANT i32 0 + %44:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %42(i32), %43 + %45:sreg_32_xm0_xexec(i32) = SI_IF %44(i1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.5: successors: %bb.3(0x80000000) - %35:_(s32) = G_PHI %46(s32), %bb.7, %13(s32), %bb.2 - %37:_(s1) = G_PHI %47(s1), %bb.7, %27(s1), %bb.2 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32) + %35:_(i32) = G_PHI %46(i32), %bb.7, %13(i32), %bb.2 + %37:_(i1) = G_PHI %47(i1), %bb.7, %27(i1), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(i32) G_BR %bb.3 bb.6: successors: %bb.7(0x80000000) - %48:_(s32) = G_CONSTANT i32 2 - %49:_(s64) = G_SHL %19, %48(s32) - %50:_(p1) = G_PTR_ADD %2, %49(s64) - %51:_(s32) = G_LOAD %50(p1) :: (load (s32), addrspace 1) - %52:_(s32) = G_CONSTANT i32 1 - %53:_(s32) = G_ADD %51, %52 - G_STORE %53(s32), %50(p1) :: (store (s32), addrspace 1) - %54:_(s32) = G_ADD %16, %52 - %55:_(s32) = G_CONSTANT i32 100 - %56:_(s1) = G_ICMP intpred(ult), %16(s32), %55 + %48:_(i32) = G_CONSTANT i32 2 + %49:_(i64) = G_SHL %19, %48(i32) + %50:_(p1) = G_PTR_ADD %2, %49(i64) + %51:_(i32) = G_LOAD %50(p1) :: (load (i32), addrspace 1) + %52:_(i32) = G_CONSTANT i32 1 + %53:_(i32) = G_ADD %51, %52 + G_STORE %53(i32), %50(p1) :: (store (i32), addrspace 1) + %54:_(i32) = G_ADD %16, %52 + %55:_(i32) = G_CONSTANT i32 100 + %56:_(i1) = G_ICMP intpred(ult), %16(i32), %55 bb.7: successors: %bb.5(0x80000000) - %46:_(s32) = G_PHI %54(s32), %bb.6, %13(s32), %bb.4 - %47:_(s1) = G_PHI %56(s1), %bb.6, %38(s1), %bb.4 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %45(s32) + %46:_(i32) = G_PHI %54(i32), %bb.6, %13(i32), %bb.4 + %47:_(i1) = G_PHI %56(i1), %bb.6, %38(i1), %bb.4 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %45(i32) G_BR %bb.5 bb.8: - %57:_(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %57(s32) + %57:_(i32) = G_PHI %15(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %57(i32) S_ENDPGM 0 + + + + + + + + + + + + + + + + ... --- @@ -766,191 +814,203 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[DEF3]](i1), %bb.0, %67(i1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[DEF2]](i1), %bb.0, %56(i1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %43(i1), %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI %12(i32), %bb.5, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %14(i32), %bb.5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[PHI2]](i1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI4]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY10]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY7]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY9]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_1]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX10-NEXT: G_STORE [[C4]](s32), [[MV2]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX10-NEXT: G_STORE [[C4]](i32), [[MV2]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] - ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]] - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[C5]](i1) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C6]](i32) + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](i64) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[LOAD1]], [[C7]] + ; GFX10-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD1]](p1) :: (store (i32), addrspace 1) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI4]], [[C7]] + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[PHI4]](i32), [[C8]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY11]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY13]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY12]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY14]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(i32) ; GFX10-NEXT: S_ENDPGM 0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_1]](i1), %bb.1, [[S_OR_B32_3]](i1), %bb.3 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, [[S_OR_B32_2]](i1), %bb.3 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(i32) = G_PHI [[ADD1]](i32), %bb.3, [[DEF]](i32), %bb.1 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[PHI6]](i1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[COPY16]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](i1), [[PHI3]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY17]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_4]](i1), [[S_AND_B32_4]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.5 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[S_OR_B32_4]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](i32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY18]](i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.3(0x40000000), %bb.5(0x40000000) - %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0 - %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5 - %15:_(s1) = G_CONSTANT i1 true - %16:_(s64) = G_SEXT %13(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %5, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21 - %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(i32) = G_PHI %12(i32), %bb.5, %9(i32), %bb.0 + %13:_(i32) = G_PHI %9(i32), %bb.0, %14(i32), %bb.5 + %15:_(i1) = G_CONSTANT i1 true + %16:_(i64) = G_SEXT %13(i32) + %17:_(i32) = G_CONSTANT i32 2 + %18:_(i64) = G_SHL %16, %17(i32) + %19:_(p1) = G_PTR_ADD %5, %18(i64) + %20:_(i32) = G_LOAD %19(p1) :: (load (i32), addrspace 1) + %21:_(i32) = G_CONSTANT i32 0 + %22:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %20(i32), %21 + %23:sreg_32_xm0_xexec(i32) = SI_IF %22(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.3 bb.2: successors: %bb.4(0x80000000) - %24:_(s32) = G_CONSTANT i32 10 - G_STORE %24(s32), %8(p1) :: (store (s32), addrspace 1) + %24:_(i32) = G_CONSTANT i32 10 + G_STORE %24(i32), %8(p1) :: (store (i32), addrspace 1) G_BR %bb.4 bb.3: successors: %bb.5(0x80000000) - %25:_(s1) = G_CONSTANT i1 false - %26:_(s32) = G_CONSTANT i32 2 - %27:_(s64) = G_SHL %16, %26(s32) - %28:_(p1) = G_PTR_ADD %2, %27(s64) - %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1) - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %29, %30 - G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1) - %32:_(s32) = G_ADD %13, %30 - %33:_(s32) = G_CONSTANT i32 100 - %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33 + %25:_(i1) = G_CONSTANT i1 false + %26:_(i32) = G_CONSTANT i32 2 + %27:_(i64) = G_SHL %16, %26(i32) + %28:_(p1) = G_PTR_ADD %2, %27(i64) + %29:_(i32) = G_LOAD %28(p1) :: (load (i32), addrspace 1) + %30:_(i32) = G_CONSTANT i32 1 + %31:_(i32) = G_ADD %29, %30 + G_STORE %31(i32), %28(p1) :: (store (i32), addrspace 1) + %32:_(i32) = G_ADD %13, %30 + %33:_(i32) = G_CONSTANT i32 100 + %34:_(i1) = G_ICMP intpred(ult), %13(i32), %33 G_BR %bb.5 bb.4: - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %35(i32) S_ENDPGM 0 bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1 - %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1 - %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32) - %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32) - SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %14:_(i32) = G_PHI %32(i32), %bb.3, %10(i32), %bb.1 + %36:_(i1) = G_PHI %25(i1), %bb.3, %15(i1), %bb.1 + %37:_(i1) = G_PHI %34(i1), %bb.3, %15(i1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(i32) + %12:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(i1), %11(i32) + SI_LOOP %12(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.6: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5 - %39:_(s32) = G_PHI %12(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32) - %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + %38:sreg_32_xm0_xexec(i1) = G_PHI %36(i1), %bb.5 + %39:_(i32) = G_PHI %12(i32), %bb.5 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(i32) + %35:sreg_32_xm0_xexec(i32) = SI_IF %38(i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 + + + + + + + + + + + + ... --- @@ -963,191 +1023,205 @@ body: | ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]] - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i1) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY4]](i32), [[COPY1]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP]](i1) ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sle), [[COPY4]](i32), [[COPY]] ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %53(s1), %bb.6, %57(s1), %bb.7 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY9]](s1), %17(s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI %53(i1), %bb.6, %57(i1), %bb.7 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI %35(i1), %bb.6, %34(i1), %bb.7 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i1) = G_PHI %12(i1), %bb.6, [[DEF]](i1), %bb.7 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI2]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY9]](i1), %17(i32) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY7]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1, %19(s32), %bb.3 - ; GFX10-NEXT: [[INT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32) - ; GFX10-NEXT: SI_LOOP [[INT1]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI [[C1]](i32), %bb.1, %19(i32), %bb.3 + ; GFX10-NEXT: [[INT1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](i1), [[PHI3]](i32) + ; GFX10-NEXT: SI_LOOP [[INT1]](i32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.5(0x04000000), %bb.7(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]] - ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32) - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %49(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](i32) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY5]](i32), [[COPY]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[C2]](i1) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[C2]] + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP2]], [[XOR]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](i1), %25(i32) + ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 %49(i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY11]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY12]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT2]](i32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[COPY3]], [[COPY2]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32) - ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[INT2]](i32), %bb.4 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_1]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](i32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY13]](i1), [[COPY3]], [[COPY2]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](i32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT1]](s32), %bb.3 - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI [[INT1]](i32), %bb.3 + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[C3]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 %42(i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY14]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_3]](i1), [[S_AND_B32_3]](i1), implicit-def $scc + ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF3]](s1), %bb.4 - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI9]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY17]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[COPY6]](i1), %bb.0, [[S_OR_B32_]](i1), %bb.2, [[S_OR_B32_2]](i1), %bb.4 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(i1) = PHI [[DEF2]](i1), %bb.0, [[PHI7]](i1), %bb.2, [[S_OR_B32_1]](i1), %bb.4 + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, [[PHI1]](i1), %bb.2, [[DEF3]](i1), %bb.4 + ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(i32) = G_PHI [[INT2]](i32), %bb.4, [[PHI9]](i32), %bb.2, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.4, [[INT]](i32), %bb.2, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI6]](i1) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[PHI7]](i1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(i1) = COPY [[PHI8]](i1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(i1) = COPY [[C4]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY17]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY18]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_4]](i1), [[S_AND_B32_4]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_4]](i1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY15]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 bb.0: successors: %bb.7(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = G_CONSTANT i32 0 - %7:_(s1) = G_IMPLICIT_DEF - %8:_(s1) = G_ICMP intpred(sgt), %4(s32), %1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = G_CONSTANT i32 0 + %7:_(i1) = G_IMPLICIT_DEF + %8:_(i1) = G_ICMP intpred(sgt), %4(i32), %1 G_BR %bb.7 bb.1: successors: %bb.3(0x80000000) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s1) = G_ICMP intpred(sle), %4(s32), %0 + %9:_(i32) = G_CONSTANT i32 0 + %10:_(i1) = G_ICMP intpred(sle), %4(i32), %0 G_BR %bb.3 bb.2: successors: %bb.4(0x40000000), %bb.7(0x40000000) - %11:_(s1) = G_PHI %12(s1), %bb.6, %7(s1), %bb.7 - %13:_(s1) = G_PHI %12(s1), %bb.6, %14(s1), %bb.7 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - %16:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(s1), %17(s32) - SI_LOOP %16(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(i1) = G_PHI %12(i1), %bb.6, %7(i1), %bb.7 + %13:_(i1) = G_PHI %12(i1), %bb.6, %14(i1), %bb.7 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) + %16:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(i1), %17(i32) + SI_LOOP %16(i32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.6(0x04000000), %bb.3(0x7c000000) - %18:_(s32) = G_PHI %9(s32), %bb.1, %19(s32), %bb.3 - %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %18(s32) - SI_LOOP %19(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %18:_(i32) = G_PHI %9(i32), %bb.1, %19(i32), %bb.3 + %19:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(i1), %18(i32) + SI_LOOP %19(i32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.4: successors: %bb.5(0x04000000), %bb.7(0x7c000000) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32) - %20:_(s1) = G_ICMP intpred(sgt), %5(s32), %0 - %21:_(s1) = G_CONSTANT i1 true - %22:_(s1) = G_XOR %8, %21 - %23:_(s1) = G_OR %20, %22 - %24:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %23(s1), %25(s32) - SI_LOOP %24(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(i32) + %20:_(i1) = G_ICMP intpred(sgt), %5(i32), %0 + %21:_(i1) = G_CONSTANT i1 true + %22:_(i1) = G_XOR %8, %21 + %23:_(i1) = G_OR %20, %22 + %24:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %23(i1), %25(i32) + SI_LOOP %24(i32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.5 bb.5: - %26:_(s1) = G_PHI %20(s1), %bb.4 - %27:_(s32) = G_PHI %24(s32), %bb.4 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) - %28:_(s32) = G_SELECT %26(s1), %3, %2 - %29:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %28(s32) - $sgpr0 = COPY %29(s32) + %26:_(i1) = G_PHI %20(i1), %bb.4 + %27:_(i32) = G_PHI %24(i32), %bb.4 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(i32) + %28:_(i32) = G_SELECT %26(i1), %3, %2 + %29:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %28(i32) + $sgpr0 = COPY %29(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 bb.6: successors: %bb.2(0x80000000) - %30:_(s32) = G_PHI %19(s32), %bb.3 - %12:_(s1) = G_CONSTANT i1 false - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32) + %30:_(i32) = G_PHI %19(i32), %bb.3 + %12:_(i1) = G_CONSTANT i1 false + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(i32) G_BR %bb.2 bb.7: successors: %bb.1(0x40000000), %bb.2(0x40000000) - %25:_(s32) = G_PHI %24(s32), %bb.4, %25(s32), %bb.2, %6(s32), %bb.0 - %17:_(s32) = G_PHI %6(s32), %bb.4, %16(s32), %bb.2, %6(s32), %bb.0 - %31:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %11(s1), %bb.2, %21(s1), %bb.4 - %14:_(s1) = G_CONSTANT i1 true - %15:sreg_32_xm0_xexec(s32) = SI_IF %31(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %25:_(i32) = G_PHI %24(i32), %bb.4, %25(i32), %bb.2, %6(i32), %bb.0 + %17:_(i32) = G_PHI %6(i32), %bb.4, %16(i32), %bb.2, %6(i32), %bb.0 + %31:sreg_32_xm0_xexec(i1) = G_PHI %8(i1), %bb.0, %11(i1), %bb.2, %21(i1), %bb.4 + %14:_(i1) = G_CONSTANT i1 true + %15:sreg_32_xm0_xexec(i32) = SI_IF %31(i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 + + + + + + + + + + + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir index fb436623bed2d..426860a53af6b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir @@ -11,81 +11,91 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF]](i1), %bb.0, %25(i1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %7(i32), %bb.1, [[C1]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C1]](i32), %bb.0, %9(i32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i1) = G_PHI [[C]](i1), %bb.0, %11(i1), %bb.1 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[PHI3]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[PHI2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI1]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY4]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY3]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[C5]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[C4]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY5]](i1), [[BITCAST1]], [[BITCAST2]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s1) = G_CONSTANT i1 true - %5:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p0) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i1) = G_CONSTANT i1 true + %5:_(i32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1 - %12:_(s1) = G_CONSTANT i1 true - %11:_(s1) = G_XOR %10, %12 - %13:_(s32) = G_UITOFP %8(s32) - %14:_(s1) = G_FCMP floatpred(ogt), %13(s32), %0 - %15:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = G_ADD %8, %15 - %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %6(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:_(i32) = G_PHI %7(i32), %bb.1, %5(i32), %bb.0 + %8:_(i32) = G_PHI %5(i32), %bb.0, %9(i32), %bb.1 + %10:_(i1) = G_PHI %4(i1), %bb.0, %11(i1), %bb.1 + %12:_(i1) = G_CONSTANT i1 true + %11:_(i1) = G_XOR %10, %12 + %13:_(f32) = G_UITOFP %8(i32) + %14:_(f32) = G_BITCAST %0(i32) + %15:_(i1) = G_FCMP floatpred(ogt), %13(f32), %14 + %16:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = G_ADD %8, %16 + %7:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %15(i1), %6(i32) + SI_LOOP %7(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %16:_(s1) = G_PHI %10(s1), %bb.1 - %17:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) - %18:_(s32) = G_FCONSTANT float 0.000000e+00 - %19:_(s32) = G_FCONSTANT float 1.000000e+00 - %20:_(s32) = G_SELECT %16(s1), %19, %18 - G_STORE %20(s32), %3(p0) :: (store (s32)) + %17:_(i1) = G_PHI %10(i1), %bb.1 + %18:_(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(i32) + %19:_(f32) = G_FCONSTANT float 0.000000e+00 + %20:_(f32) = G_FCONSTANT float 1.000000e+00 + %21:_(i32) = G_BITCAST %20(f32) + %22:_(i32) = G_BITCAST %19(f32) + %23:_(i32) = G_SELECT %17(i1), %21, %22 + G_STORE %23(i32), %3(p0) :: (store (i32)) SI_RETURN + + + + ... --- @@ -98,81 +108,91 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF]](i1), %bb.0, %25(i1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI %7(i32), %bb.1, [[C1]](i32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[C1]](i32), %bb.0, %9(i32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i1) = G_PHI [[C]](i1), %bb.0, %11(i1), %bb.1 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[PHI3]], [[C2]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(i1) = COPY [[XOR]](i1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[PHI2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI2]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI1]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY3]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY4]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[C5]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[C4]](f32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[COPY5]](i1), [[BITCAST1]], [[BITCAST2]] + ; GFX10-NEXT: G_STORE [[SELECT]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s1) = G_CONSTANT i1 true - %5:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p0) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i1) = G_CONSTANT i1 true + %5:_(i32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1 - %12:_(s1) = G_CONSTANT i1 true - %11:_(s1) = G_XOR %10, %12 - %13:_(s32) = G_UITOFP %8(s32) - %14:_(s1) = G_FCMP floatpred(ogt), %13(s32), %0 - %15:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = G_ADD %8, %15 - %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %6(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:_(i32) = G_PHI %7(i32), %bb.1, %5(i32), %bb.0 + %8:_(i32) = G_PHI %5(i32), %bb.0, %9(i32), %bb.1 + %10:_(i1) = G_PHI %4(i1), %bb.0, %11(i1), %bb.1 + %12:_(i1) = G_CONSTANT i1 true + %11:_(i1) = G_XOR %10, %12 + %13:_(f32) = G_UITOFP %8(i32) + %14:_(f32) = G_BITCAST %0(i32) + %15:_(i1) = G_FCMP floatpred(ogt), %13(f32), %14 + %16:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = G_ADD %8, %16 + %7:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %15(i1), %6(i32) + SI_LOOP %7(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %16:_(s1) = G_PHI %11(s1), %bb.1 - %17:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) - %18:_(s32) = G_FCONSTANT float 0.000000e+00 - %19:_(s32) = G_FCONSTANT float 1.000000e+00 - %20:_(s32) = G_SELECT %16(s1), %19, %18 - G_STORE %20(s32), %3(p0) :: (store (s32)) + %17:_(i1) = G_PHI %11(i1), %bb.1 + %18:_(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(i32) + %19:_(f32) = G_FCONSTANT float 0.000000e+00 + %20:_(f32) = G_FCONSTANT float 1.000000e+00 + %21:_(i32) = G_BITCAST %20(f32) + %22:_(i32) = G_BITCAST %19(f32) + %23:_(i32) = G_SELECT %17(i1), %21, %22 + G_STORE %23(i32), %3(p0) :: (store (i32)) SI_RETURN + + + + ... --- @@ -185,174 +205,186 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF2]](s1), %bb.0, %53(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %42(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(i1) = PHI [[DEF2]](i1), %bb.0, %53(i1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %42(i1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI %13(i32), %bb.5, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI [[C]](i32), %bb.0, %15(i32), %bb.5 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI]](i1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[PHI1]](i1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[C1]](i1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[PHI3]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C2]](i32) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C3]] + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY8]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY9]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; GFX10-NEXT: G_BRCOND [[ICMP]](i1), %bb.3 ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX10-NEXT: G_STORE [[C4]](s32), [[MV2]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX10-NEXT: G_STORE [[C4]](i32), [[MV2]](p1) :: (store (i32), addrspace 1) ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] - ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C7]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT]], [[C6]](i32) + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](i64) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[LOAD1]], [[C7]] + ; GFX10-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD1]](p1) :: (store (i32), addrspace 1) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[PHI3]], [[C7]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[PHI3]](i32), [[COPY2]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP1]](i1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY11]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(i32) ; GFX10-NEXT: S_ENDPGM 0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3 - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, [[S_OR_B32_1]](i1), %bb.3 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(i32) = G_PHI [[ADD1]](i32), %bb.3, [[DEF]](i32), %bb.1 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(i1) = G_PHI [[C5]](i1), %bb.3, [[C1]](i1), %bb.1 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[PHI6]](i1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[PHI4]](i1) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](i1), [[PHI2]](i32) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_ANDN2_B32 [[COPY7]](i1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_AND_B32 $exec_lo, [[COPY12]](i1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_2]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.5 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[S_OR_B32_2]](i1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY14]](i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $sgpr0 - %5:_(s32) = COPY $sgpr1 - %6:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) - %7:_(s32) = COPY $sgpr2 - %8:_(s32) = COPY $sgpr3 - %9:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) - %10:_(s32) = G_CONSTANT i32 0 - %11:_(s32) = G_IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $sgpr0 + %5:_(i32) = COPY $sgpr1 + %6:_(p1) = G_MERGE_VALUES %4(i32), %5(i32) + %7:_(i32) = COPY $sgpr2 + %8:_(i32) = COPY $sgpr3 + %9:_(p1) = G_MERGE_VALUES %7(i32), %8(i32) + %10:_(i32) = G_CONSTANT i32 0 + %11:_(i32) = G_IMPLICIT_DEF bb.1: successors: %bb.3(0x50000000), %bb.5(0x30000000) - %12:_(s32) = G_PHI %13(s32), %bb.5, %10(s32), %bb.0 - %14:_(s32) = G_PHI %10(s32), %bb.0, %15(s32), %bb.5 - %16:_(s1) = G_CONSTANT i1 true - %17:_(s64) = G_SEXT %14(s32) - %18:_(s32) = G_CONSTANT i32 2 - %19:_(s64) = G_SHL %17, %18(s32) - %20:_(p1) = G_PTR_ADD %6, %19(s64) - %21:_(s32) = G_LOAD %20(p1) :: (load (s32), addrspace 1) - %22:_(s32) = G_CONSTANT i32 0 - %23:_(s1) = G_ICMP intpred(ne), %21(s32), %22 - G_BRCOND %23(s1), %bb.3 + %12:_(i32) = G_PHI %13(i32), %bb.5, %10(i32), %bb.0 + %14:_(i32) = G_PHI %10(i32), %bb.0, %15(i32), %bb.5 + %16:_(i1) = G_CONSTANT i1 true + %17:_(i64) = G_SEXT %14(i32) + %18:_(i32) = G_CONSTANT i32 2 + %19:_(i64) = G_SHL %17, %18(i32) + %20:_(p1) = G_PTR_ADD %6, %19(i64) + %21:_(i32) = G_LOAD %20(p1) :: (load (i32), addrspace 1) + %22:_(i32) = G_CONSTANT i32 0 + %23:_(i1) = G_ICMP intpred(ne), %21(i32), %22 + G_BRCOND %23(i1), %bb.3 G_BR %bb.5 bb.2: successors: %bb.4(0x80000000) - %24:_(s32) = G_CONSTANT i32 10 - G_STORE %24(s32), %9(p1) :: (store (s32), addrspace 1) + %24:_(i32) = G_CONSTANT i32 10 + G_STORE %24(i32), %9(p1) :: (store (i32), addrspace 1) G_BR %bb.4 bb.3: successors: %bb.5(0x80000000) - %25:_(s1) = G_CONSTANT i1 false - %26:_(s32) = G_CONSTANT i32 2 - %27:_(s64) = G_SHL %17, %26(s32) - %28:_(p1) = G_PTR_ADD %2, %27(s64) - %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1) - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %29, %30 - G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1) - %32:_(s32) = G_ADD %14, %30 - %33:_(s1) = G_ICMP intpred(ult), %14(s32), %3 + %25:_(i1) = G_CONSTANT i1 false + %26:_(i32) = G_CONSTANT i32 2 + %27:_(i64) = G_SHL %17, %26(i32) + %28:_(p1) = G_PTR_ADD %2, %27(i64) + %29:_(i32) = G_LOAD %28(p1) :: (load (i32), addrspace 1) + %30:_(i32) = G_CONSTANT i32 1 + %31:_(i32) = G_ADD %29, %30 + G_STORE %31(i32), %28(p1) :: (store (i32), addrspace 1) + %32:_(i32) = G_ADD %14, %30 + %33:_(i1) = G_ICMP intpred(ult), %14(i32), %3 G_BR %bb.5 bb.4: - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(i32) S_ENDPGM 0 bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %15:_(s32) = G_PHI %32(s32), %bb.3, %11(s32), %bb.1 - %35:_(s1) = G_PHI %25(s1), %bb.3, %16(s1), %bb.1 - %36:_(s1) = G_PHI %33(s1), %bb.3, %16(s1), %bb.1 - %13:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %12(s32) - SI_LOOP %13(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %15:_(i32) = G_PHI %32(i32), %bb.3, %11(i32), %bb.1 + %35:_(i1) = G_PHI %25(i1), %bb.3, %16(i1), %bb.1 + %36:_(i1) = G_PHI %33(i1), %bb.3, %16(i1), %bb.1 + %13:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(i1), %12(i32) + SI_LOOP %13(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.6: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %37:sreg_32_xm0_xexec(s1) = G_PHI %35(s1), %bb.5 - %38:_(s32) = G_PHI %13(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32) - %34:sreg_32_xm0_xexec(s32) = SI_IF %37(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + %37:sreg_32_xm0_xexec(i1) = G_PHI %35(i1), %bb.5 + %38:_(i32) = G_PHI %13(i32), %bb.5 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(i32) + %34:sreg_32_xm0_xexec(i32) = SI_IF %37(i1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 + + + + + + + + + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir index d1b473f2f41d8..6d763f530433b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir @@ -11,60 +11,66 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32) - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) - ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(i32) = G_PHI %7(i32), %bb.1, [[C]](i32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(i32) = G_PHI [[C1]](i32), %bb.0, %9(i32), %bb.1 + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[PHI1]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[ADD]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI]](i32) + ; GFX10-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) - ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(i32) = G_PHI [[ADD]](i32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(i32) = G_PHI [[INT]](i32), %bb.1 + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i32) + ; GFX10-NEXT: G_STORE [[PHI2]](i32), [[MV]](p0) :: (store (i32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s32) = G_CONSTANT i32 -1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p0) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i32) = G_CONSTANT i32 -1 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = G_ADD %8, %10 - %11:_(s32) = G_UITOFP %9(s32) - %12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0 - %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:_(i32) = G_PHI %7(i32), %bb.1, %4(i32), %bb.0 + %8:_(i32) = G_PHI %5(i32), %bb.0, %9(i32), %bb.1 + %10:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = G_ADD %8, %10 + %11:_(f32) = G_UITOFP %9(i32) + %12:_(f32) = G_BITCAST %0(i32) + %13:_(i1) = G_FCMP floatpred(ogt), %11(f32), %12 + %7:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(i1), %6(i32) + SI_LOOP %7(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %13:_(s32) = G_PHI %9(s32), %bb.1 - %14:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - G_STORE %13(s32), %3(p0) :: (store (s32)) + %14:_(i32) = G_PHI %9(i32), %bb.1 + %15:_(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) + G_STORE %14(i32), %3(p0) :: (store (i32)) SI_RETURN + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.mir index 3f16aca150671..419392184fc92 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.mir @@ -23,20 +23,23 @@ body: | ; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_FMA_MIX_F32_]], implicit $exec ; GFX9-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %0:sgpr, %2:sgpr(s32) - %4:sgpr(s16) = G_TRUNC %3:sgpr(s32) - %5:sgpr(s32) = G_LSHR %1:sgpr, %2:sgpr(s32) - %6:sgpr(s16) = G_TRUNC %5:sgpr(s32) - %7:vgpr(s16) = COPY %4:sgpr(s16) - %8:vgpr(s32) = G_FPEXT %7:vgpr(s16) - %9:vgpr(s16) = COPY %6:sgpr(s16) - %10:vgpr(s32) = G_FPEXT %9:vgpr(s16) - %11:vgpr(s32) = G_FNEG %10:vgpr - %12:vgpr(s32) = G_FMA %11:vgpr, %10:vgpr, %8:vgpr - %13:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %12:vgpr(s32) - $sgpr0 = COPY %13:sgpr(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %0, %2(i32) + %4:sgpr(i16) = G_TRUNC %3(i32) + %5:sgpr(i32) = G_LSHR %1, %2(i32) + %6:sgpr(i16) = G_TRUNC %5(i32) + %7:vgpr(i16) = COPY %4(i16) + %8:vgpr(f16) = G_BITCAST %7(i16) + %9:vgpr(f32) = G_FPEXT %8(f16) + %10:vgpr(i16) = COPY %6(i16) + %11:vgpr(f16) = G_BITCAST %10(i16) + %12:vgpr(f32) = G_FPEXT %11(f16) + %13:vgpr(f32) = G_FNEG %12 + %14:vgpr(f32) = G_FMA %13, %12, %9 + %15:vgpr(i32) = G_BITCAST %14(f32) + %16:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %15(i32) + $sgpr0 = COPY %16(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir index 581cf6c7fcdf2..b4757652e7495 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir @@ -27,11 +27,11 @@ body: | ; GFX9-NEXT: %src0:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit %smax - %src0:sgpr(s32) = COPY $sgpr0 - %zero:sgpr(s32) = G_CONSTANT i32 0 - %ineg:sgpr(s32) = G_SUB %zero, %src0 - %smax:sgpr(s32) = G_SMAX %src0, %ineg - S_ENDPGM 0, implicit %smax + %src0:sgpr(i32) = COPY $sgpr0 + %zero:sgpr(i32) = G_CONSTANT i32 0 + %ineg:sgpr(i32) = G_SUB %zero, %src0 + %smax:sgpr(i32) = G_SMAX %src0, %ineg + S_ENDPGM 0, implicit %smax(i32) ... --- @@ -57,11 +57,11 @@ body: | ; GFX9-NEXT: %src0:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit %smax - %src0:sgpr(s32) = COPY $sgpr0 - %zero:sgpr(s32) = G_CONSTANT i32 0 - %ineg:sgpr(s32) = G_SUB %zero, %src0 - %smax:sgpr(s32) = G_SMAX %ineg, %src0 - S_ENDPGM 0, implicit %smax + %src0:sgpr(i32) = COPY $sgpr0 + %zero:sgpr(i32) = G_CONSTANT i32 0 + %ineg:sgpr(i32) = G_SUB %zero, %src0 + %smax:sgpr(i32) = G_SMAX %ineg, %src0 + S_ENDPGM 0, implicit %smax(i32) ... --- @@ -91,11 +91,11 @@ body: | ; GFX9-NEXT: %ineg:vgpr_32 = V_SUB_U32_e64 %zero, %src0, 0, implicit $exec ; GFX9-NEXT: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit %smax - %src0:vgpr(s32) = COPY $vgpr0 - %zero:vgpr(s32) = G_CONSTANT i32 0 - %ineg:vgpr(s32) = G_SUB %zero, %src0 - %smax:vgpr(s32) = G_SMAX %src0, %ineg - S_ENDPGM 0, implicit %smax + %src0:vgpr(i32) = COPY $vgpr0 + %zero:vgpr(i32) = G_CONSTANT i32 0 + %ineg:vgpr(i32) = G_SUB %zero, %src0 + %smax:vgpr(i32) = G_SMAX %src0, %ineg + S_ENDPGM 0, implicit %smax(i32) ... # FIXME: Violates constant bus restriction @@ -108,9 +108,9 @@ body: | # bb.0: # liveins: $sgpr0 -# %src0:sgpr(s32) = COPY $sgpr0 -# %zero:sgpr(s32) = G_CONSTANT i32 0 -# %ineg:sgpr(s32) = G_SUB %zero, %src0 -# %smax:vgpr(s32) = G_SMAX %src0, %ineg +# %src0:sgpr(i32) = COPY $sgpr0 +# %zero:sgpr(i32) = G_CONSTANT i32 0 +# %ineg:sgpr(i32) = G_SUB %zero, %src0 +# %smax:vgpr(i32) = G_SMAX %src0, %ineg # S_ENDPGM 0, implicit %smax # ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index 2087d45cdf3d9..4aade136103bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -38,22 +38,22 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 %3:vgpr(p1) = COPY $vgpr3_vgpr4 - %4:sgpr(s32) = G_CONSTANT i32 1 - %5:sgpr(s32) = G_CONSTANT i32 4096 + %4:sgpr(i32) = G_CONSTANT i32 1 + %5:sgpr(i32) = G_CONSTANT i32 4096 + %6:sgpr(i32) = G_ADD %0, %1 + %7:vgpr(i32) = G_ADD %2, %6 + %8:vgpr(i32) = G_ADD %6, %7 + %9:vgpr(i32) = G_ADD %8, %2 + S_ENDPGM 0, implicit %6(i32), implicit %7(i32), implicit %8(i32), implicit %9(i32) - %6:sgpr(s32) = G_ADD %0, %1 - %7:vgpr(s32) = G_ADD %2, %6 - %8:vgpr(s32) = G_ADD %6, %7 - %9:vgpr(s32) = G_ADD %8, %2 - S_ENDPGM 0, implicit %6, implicit %7, implicit %8, implicit %9 ... @@ -80,10 +80,10 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 -64 - %2:sgpr(s32) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 -64 + %2:sgpr(i32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -110,10 +110,10 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -64 - %2:vgpr(s32) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CONSTANT i32 -64 + %2:vgpr(i32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -142,10 +142,10 @@ body: | ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 16 - %2:sgpr(s32) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 16 + %2:sgpr(i32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -174,9 +174,9 @@ body: | ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 16 - %2:vgpr(s32) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CONSTANT i32 16 + %2:vgpr(i32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir index 1971cd80d5686..151bb131270b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -39,12 +39,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ADD %2, %3 + S_ENDPGM 0, implicit %4(i16) ... @@ -85,13 +85,13 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_fake16_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ADD %2, %3 - %5:vgpr(s32) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ADD %2, %3 + %5:vgpr(i32) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i32) ... @@ -125,11 +125,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_CONSTANT i16 -64 - %3:vgpr(s16) = G_ADD %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i16) = G_CONSTANT i16 -64 + %3:vgpr(i16) = G_ADD %1, %2 + S_ENDPGM 0, implicit %3(i16) ... @@ -167,11 +167,11 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_fake16_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_CONSTANT i16 -64 - %3:vgpr(s16) = G_ADD %1, %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i16) = G_CONSTANT i16 -64 + %3:vgpr(i16) = G_ADD %1, %2 + %4:vgpr(i32) = G_ZEXT %3(i16) + S_ENDPGM 0, implicit %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir index 46c801b5738e1..5e1e63f502e26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir @@ -18,6 +18,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; ; WAVE32-LABEL: name: class_s32_vcc_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -25,10 +26,11 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f32), %1(i32) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -47,6 +49,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; ; WAVE32-LABEL: name: class_s32_vcc_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -54,10 +57,11 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f32), %1(i32) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -76,6 +80,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; ; WAVE32-LABEL: name: class_s32_vcc_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -83,10 +88,11 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f32), %1(i32) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -105,6 +111,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; ; WAVE32-LABEL: name: class_s64_vcc_sv ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -112,10 +119,11 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f64) = G_BITCAST %0(i64) + %3:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -135,6 +143,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; ; WAVE32-LABEL: name: class_s64_vcc_vs ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -142,10 +151,11 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -165,6 +175,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; ; WAVE32-LABEL: name: class_s64_vcc_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} @@ -172,8 +183,9 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) + S_ENDPGM 0, implicit %3(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir index 3ca3928fbfad3..c0ad6fbd7b59c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir @@ -5,9 +5,9 @@ # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s # SI-ERR-NOT: remark -# SI-ERR: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:sgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_sv) -# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:sgpr(s32) (in function: class_s16_vcc_vs) -# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_vv) +# SI-ERR: remark: :0:0: cannot select: %4:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3:sgpr(f16), %1:vgpr(i32) (in function: class_s16_vcc_sv) +# SI-ERR-NEXT: remark: :0:0: cannot select: %4:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3:vgpr(f16), %1:sgpr(i32) (in function: class_s16_vcc_vs) +# SI-ERR-NEXT: remark: :0:0: cannot select: %4:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3:vgpr(f16), %1:vgpr(i32) (in function: class_s16_vcc_vv) # SI-ERR-NOT: remark --- @@ -33,11 +33,12 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(f16) = G_BITCAST %2(i16) + %4:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3(f16), %1(i32) + S_ENDPGM 0, implicit %4(i1) ... --- @@ -63,11 +64,12 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(f16) = G_BITCAST %2(i16) + %4:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3(f16), %1(i32) + S_ENDPGM 0, implicit %4(i1) ... --- @@ -93,9 +95,10 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(f16) = G_BITCAST %2(i16) + %4:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3(f16), %1(i32) + S_ENDPGM 0, implicit %4(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir index 20e205f3a4262..7ee1d1cf62c3d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir @@ -16,11 +16,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -37,9 +39,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir index d6b8603bc2aec..e43bd320a0264 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir @@ -2,8 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1:sgpr(s16) (in function: cos_s16_vs) -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1:vgpr(s16) (in function: cos_s16_vv) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %2:sgpr(f16) (in function: cos_s16_vs) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %2:vgpr(f16) (in function: cos_s16_vv) --- name: cos_s16_vs @@ -21,10 +21,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -43,8 +45,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir index 14d48033980c6..3834ced5c6a84 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -41,10 +41,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -63,8 +63,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir index 3ed82570ac309..66d9894519a3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -41,10 +41,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -63,8 +63,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir index dd185faf672f4..bb19a63f63df4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir @@ -18,10 +18,12 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %2(f32), %3(f32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -41,10 +43,12 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %2(f32), %3(f32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -63,8 +67,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %2(f32), %3(f32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir index 499d108cb5cbc..67f3a3a3bf233 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir @@ -18,10 +18,12 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %2(f32), %3(f32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -41,10 +43,12 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %2(f32), %3(f32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -63,8 +67,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %2(f32), %3(f32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir index 2b1c6d04cb5ed..704d4f0173537 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir @@ -16,12 +16,15 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 - S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x f16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... --- @@ -39,12 +42,15 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 - S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x f16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... --- @@ -61,10 +67,13 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 - S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(<2 x f16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir index 5fa996ae03aca..9e22378090555 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir @@ -18,9 +18,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 0, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0(i32), 0 + S_ENDPGM 0, implicit %1(i32) ... @@ -41,8 +41,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 65535, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0(i32), 65535 + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir index bda7e42ad7c70..64267069fd175 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir @@ -12,18 +12,11 @@ regBankSelected: true body: | bb.0: liveins: $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - - %5:vgpr(<2 x s16>) = G_BITCAST %0(s32) - ; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: EXP 1, %0, %0, [[UNDEF1]], [[UNDEF0]], 0, 1, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0 - - ; CHECK: [[UNDEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: [[UNDEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: EXP_DONE 1, %0, %0, [[UNDEF3]], [[UNDEF2]], 0, 1, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), -1, 0 - + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = G_BITCAST %0(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %1(<2 x i16>), %1(<2 x i16>), 0, 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %1(<2 x i16>), %1(<2 x i16>), -1, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir index d5fbbdafd2403..6842603fb89d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir @@ -10,12 +10,12 @@ regBankSelected: true body: | bb.0: liveins: $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(i32) = COPY $vgpr0 ; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(i32), %0:vgpr(i32), %0:vgpr(i32), %0:vgpr(i32), 0, 0 ; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), -1, 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(i32), %0:vgpr(i32), %0:vgpr(i32), %0:vgpr(i32), -1, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir index 49383135ab0c5..c0af14226562f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir @@ -30,12 +30,14 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_FPTRUNC %0 - %3:vgpr(s16) = G_FPTRUNC %1 - %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f16) = G_FPTRUNC %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f16) = G_FPTRUNC %4(f32) + %6:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f16), %5(f16), 0 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -66,12 +68,14 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_FPTRUNC %0 - %3:vgpr(s16) = G_FPTRUNC %1 - %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f16) = G_FPTRUNC %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f16) = G_FPTRUNC %4(f32) + %6:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f16), %5(f16), 15 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -90,10 +94,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_F_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 0 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -112,10 +118,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_TRU_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 15 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -136,12 +144,14 @@ body: | ; GFX11-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_CMP_F_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F64_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_FPEXT %0 - %3:vgpr(s64) = G_FPEXT %1 - %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f64) = G_FPEXT %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f64) = G_FPEXT %4(f32) + %6:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f64), %5(f64), 0 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -162,10 +172,12 @@ body: | ; GFX11-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_CMP_TRU_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F64_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_FPEXT %0 - %3:vgpr(s64) = G_FPEXT %1 - %4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f64) = G_FPEXT %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f64) = G_FPEXT %4(f32) + %6:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f64), %5(f64), 15 + S_ENDPGM 0, implicit %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir index 828eb5d3fb40a..28dfa544cf9eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir @@ -30,12 +30,14 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_FPTRUNC %0 - %3:vgpr(s16) = G_FPTRUNC %1 - %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f16) = G_FPTRUNC %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f16) = G_FPTRUNC %4(f32) + %6:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f16), %5(f16), 0 + S_ENDPGM 0, implicit %6(i64) ... --- @@ -66,12 +68,14 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_FPTRUNC %0 - %3:vgpr(s16) = G_FPTRUNC %1 - %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f16) = G_FPTRUNC %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f16) = G_FPTRUNC %4(f32) + %6:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f16), %5(f16), 15 + S_ENDPGM 0, implicit %6(i64) ... --- @@ -90,10 +94,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_F_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 0 + S_ENDPGM 0, implicit %4(i64) ... --- @@ -112,10 +118,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_TRU_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 15 + S_ENDPGM 0, implicit %4(i64) ... --- @@ -136,12 +144,14 @@ body: | ; GFX11-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_CMP_F_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F64_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_FPEXT %0 - %3:vgpr(s64) = G_FPEXT %1 - %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f64) = G_FPEXT %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f64) = G_FPEXT %4(f32) + %6:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f64), %5(f64), 0 + S_ENDPGM 0, implicit %6(i64) ... --- @@ -162,10 +172,12 @@ body: | ; GFX11-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_CMP_TRU_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F64_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_FPEXT %0 - %3:vgpr(s64) = G_FPEXT %1 - %4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f64) = G_FPEXT %2(f32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f64) = G_FPEXT %4(f32) + %6:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %3(f64), %5(f64), 15 + S_ENDPGM 0, implicit %6(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir index 944488bf09661..c4fa8c1787f90 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir @@ -22,11 +22,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... --- @@ -47,11 +51,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... --- @@ -72,11 +80,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:sgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... --- @@ -98,11 +110,15 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = COPY $sgpr0 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i32) = COPY $sgpr0 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:sgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... @@ -124,10 +140,14 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %2(f32), %3(f32), %4(f32) + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... --- @@ -148,10 +168,14 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %2(f32), %3(f32), %4(f32) + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... --- @@ -172,10 +196,14 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1, %0, %0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:sgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %2(f32), %3(f32), %4(f32) + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... --- @@ -195,9 +223,13 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1(f32), %2(f32), %3(f32) + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... @@ -237,10 +269,14 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %1(i32) + %7:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %5(f32), %6(f32), %4(f32) + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir index 69217b7d6896a..811f6b0c97f23 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir @@ -19,11 +19,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... --- @@ -44,11 +48,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... --- @@ -69,11 +77,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:sgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... --- @@ -94,11 +106,15 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = COPY $sgpr0 - %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i32) = COPY $sgpr0 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:sgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(f32), %4(f32), %5(f32) + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... @@ -120,10 +136,14 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %2(f32), %3(f32), %4(f32) + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... --- @@ -143,10 +163,14 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %2(f32), %3(f32), %4(f32) + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... --- @@ -166,10 +190,14 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %1, %0, %0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:sgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %2(f32), %3(f32), %4(f32) + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... --- @@ -188,9 +216,13 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %1(f32), %2(f32), %3(f32) + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir index e2d2f11630475..91fa13d263f14 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -3,8 +3,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s # VI-ERR-NOT: remark -# VI-ERR: remark: :0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:vgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vvvv) -# VI-ERR-NEXT: remark: :0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:sgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vsvv) +# VI-ERR: remark: :0:0: cannot select: %9:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %6:vgpr(f16), %7:vgpr(f16), %8:vgpr(f16) (in function: fmed3_s16_vvvv) +# VI-ERR-NEXT: remark: :0:0: cannot select: %9:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %6:sgpr(f16), %7:vgpr(f16), %8:vgpr(f16) (in function: fmed3_s16_vsvv) # VI-ERR-NOT: remark --- name: fmed3_s16_vvvv @@ -24,14 +24,18 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(f16) = G_BITCAST %3(i16) + %7:vgpr(f16) = G_BITCAST %4(i16) + %8:vgpr(f16) = G_BITCAST %5(i16) + %9:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %6(f16), %7(f16), %8(f16) + %10:vgpr(i16) = G_BITCAST %9(f16) + S_ENDPGM 0, implicit %10(i16) ... --- @@ -52,12 +56,16 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:sgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5 - S_ENDPGM 0, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:sgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:sgpr(f16) = G_BITCAST %3(i16) + %7:vgpr(f16) = G_BITCAST %4(i16) + %8:vgpr(f16) = G_BITCAST %5(i16) + %9:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %6(f16), %7(f16), %8(f16) + %10:vgpr(i16) = G_BITCAST %9(f16) + S_ENDPGM 0, implicit %10(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir index 9e5951663a9e4..2c1de35a40423 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir @@ -16,11 +16,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -37,11 +39,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -58,11 +62,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1(f64) + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -79,9 +85,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1(f64) + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir index 9feb4d831e077..e6fd37b2c87b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir @@ -2,8 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1:sgpr(s16) (in function: fract_s16_vs) -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1:vgpr(s16) (in function: fract_s16_vv) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %2:sgpr(f16) (in function: fract_s16_vs) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %2:vgpr(f16) (in function: fract_s16_vv) --- @@ -22,10 +22,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_FRACT_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -44,8 +46,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_FRACT_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir index dc6104c9cc30a..15d7cd46d5d78 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir @@ -17,11 +17,12 @@ body: | ; HSAPAL-LABEL: name: groupstaticsize_v ; HSAPAL: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; HSAPAL-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; ; MESA-LABEL: name: groupstaticsize_v ; MESA: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize, implicit $exec ; MESA-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] - %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - S_ENDPGM 0, implicit %0 + %0:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -38,9 +39,10 @@ body: | ; HSAPAL-LABEL: name: groupstaticsize_s ; HSAPAL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; HSAPAL-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] + ; ; MESA-LABEL: name: groupstaticsize_s ; MESA: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize ; MESA-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - S_ENDPGM 0, implicit %0 + %0:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir index ad96afa1b86c7..578fca266a26a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir @@ -10,9 +10,9 @@ # body: | # bb.0: # liveins: $sgpr0, $sgpr1 -# %0:sgpr(s32) = COPY $sgpr0 -# %1:sgpr(s32) = COPY $sgpr1 -# %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0, %1 +# %0:sgpr(i32) = COPY $sgpr0 +# %1:sgpr(i32) = COPY $sgpr1 +# %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0, %1 # S_ENDPGM 0, implicit %2 # ... @@ -31,10 +31,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -52,10 +52,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -73,8 +73,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir index 299863866d8ac..50dc9f3124e22 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir @@ -17,10 +17,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -61,8 +61,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir index ce0835ceb3df1..c710f413b1b9f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir @@ -17,10 +17,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -61,8 +61,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir index 56d7ce5dedba7..d0c177c0a1f66 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir @@ -17,10 +17,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -61,8 +61,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir index 9862d69e520c0..bddfae71f962d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir @@ -4,8 +4,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s -# VI-ERR: remark: :0:0: cannot select: %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0:sgpr(s32) (in function: rcp_legacy_s32_vs) -# VI-ERR-NEXT: remark: :0:0: cannot select: %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0:vgpr(s32) (in function: rcp_legacy_s32_vv) +# VI-ERR: remark: :0:0: cannot select: %2:vgpr_32(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %1:sgpr(f32) (in function: rcp_legacy_s32_vs) +# VI-ERR-NEXT: remark: :0:0: cannot select: %2:vgpr_32(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %1:vgpr(f32) (in function: rcp_legacy_s32_vv) --- name: rcp_legacy_s32_vs @@ -19,12 +19,15 @@ body: | ; CHECK-LABEL: name: rcp_legacy_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[V_RCP_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_LEGACY_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -39,10 +42,13 @@ body: | ; CHECK-LABEL: name: rcp_legacy_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_RCP_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_LEGACY_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir index 860cfa94bea63..2d68dd01cae96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir @@ -16,11 +16,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -37,11 +39,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -58,11 +62,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f64) + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -79,9 +85,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f64) + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir index f9ec4364fd6ff..788c6344a5696 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir @@ -2,9 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1:sgpr(s16) (in function: rcp_s16_vs) -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1:vgpr(s16) (in function: rcp_s16_vv) - +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2:sgpr(f16) (in function: rcp_s16_vs) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2:vgpr(f16) (in function: rcp_s16_vv) --- name: rcp_s16_vs legalized: true @@ -21,10 +20,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_RCP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -43,8 +44,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_RCP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir index 00d8abb0c2c32..ea29efe1d9535 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2> %t | FileCheck -check-prefix=GCN %s # RUN: FileCheck -check-prefix=ERR %s < %t -# ERR: remark: :0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s) +# ERR: remark: :0:0: cannot select: %1:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(i32) (in function: readfirstlane_s) --- name: readfirstlane_v @@ -19,9 +19,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_READFIRSTLANE_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -38,9 +38,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[V_MOV_B32_e32_]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[COPY]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:vgpr(s32) = G_CONSTANT i32 123 - %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = G_CONSTANT i32 123 + %1:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0(i32) + S_ENDPGM 0, implicit %1(i32) ... # Make sure this fails to select @@ -56,10 +56,10 @@ body: | ; GCN-LABEL: name: readfirstlane_s ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; GCN-NEXT: S_ENDPGM 0, implicit [[INT]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0 - S_ENDPGM 0, implicit %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GCN-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](i32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[INTRINSIC_CONVERGENT]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0(i32) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir index 6a89085d020b6..28dba6f6b7611 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir @@ -24,8 +24,8 @@ body: | ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @arst ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr0 - %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 - $sgpr0 = COPY %0 + %0:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 + $sgpr0 = COPY %0(i32) S_ENDPGM 0, implicit $sgpr0 ... @@ -38,13 +38,13 @@ tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: reloc_constant_vgpr32 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @arst, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 - %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 - $vgpr0 = COPY %0 + %0:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 + $vgpr0 = COPY %0(i32) S_ENDPGM 0, implicit $vgpr0 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir index ebe238aae019f..255cb9798b254 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir @@ -4,9 +4,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s -# VI-ERR: remark: :0:0: cannot select: %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0:sgpr(s32) (in function: rsq_clamp_s32_vs) -# VI-ERR-NEXT: remark: :0:0: cannot select: %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0:vgpr(s32) (in function: rsq_clamp_s32_vv) - +# VI-ERR: remark: :0:0: cannot select: %2:vgpr_32(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %1:sgpr(f32) (in function: rsq_clamp_s32_vs) +# VI-ERR-NEXT: remark: :0:0: cannot select: %2:vgpr_32(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %1:vgpr(f32) (in function: rsq_clamp_s32_vv) --- name: rsq_clamp_s32_vs legalized: true @@ -19,12 +18,15 @@ body: | ; CHECK-LABEL: name: rsq_clamp_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[V_RSQ_CLAMP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_CLAMP_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -39,10 +41,13 @@ body: | ; CHECK-LABEL: name: rsq_clamp_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_RSQ_CLAMP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_CLAMP_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir index 7fd3909405bca..641d3ed51fc61 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir @@ -4,8 +4,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s -# VI-ERR: remark: :0:0: cannot select: %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0:sgpr(s32) (in function: rsq_legacy_s32_vs) -# VI-ERR-NEXT: remark: :0:0: cannot select: %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0:vgpr(s32) (in function: rsq_legacy_s32_vv) +# VI-ERR: remark: :0:0: cannot select: %2:vgpr_32(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %1:sgpr(f32) (in function: rsq_legacy_s32_vs) +# VI-ERR-NEXT: remark: :0:0: cannot select: %2:vgpr_32(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %1:vgpr(f32) (in function: rsq_legacy_s32_vv) --- name: rsq_legacy_s32_vs @@ -19,12 +19,15 @@ body: | ; CHECK-LABEL: name: rsq_legacy_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[V_RSQ_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_LEGACY_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -39,10 +42,13 @@ body: | ; CHECK-LABEL: name: rsq_legacy_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_RSQ_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_LEGACY_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir index 97342187bee17..924da5353305c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir @@ -16,11 +16,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -37,11 +39,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -58,11 +62,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(f64) + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -79,9 +85,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(f64) + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir index ac1ff73ce8024..84d0ce3e98b71 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir @@ -2,8 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1:sgpr(s16) (in function: rsq_s16_vs) -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1:vgpr(s16) (in function: rsq_s16_vv) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %2:sgpr(f16) (in function: rsq_s16_vs) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %2:vgpr(f16) (in function: rsq_s16_vv) --- name: rsq_s16_vs @@ -21,10 +21,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_RSQ_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -43,8 +45,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_RSQ_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir index 402f4db861e93..3a105ead531cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir @@ -18,8 +18,8 @@ body: | ; GCN-NEXT: $m0 = COPY [[COPY]] ; GCN-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0 ; GCN-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 1, %0(s32) + %0:sgpr(i32) = COPY $sgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 1, %0(i32) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir index 0c7b8d33f1018..694ac62c76d32 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir @@ -17,9 +17,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_FLBIT_I32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -38,9 +38,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_I32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -59,7 +59,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0(i32) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir index 9218ce39a784f..482d032fbbadb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir @@ -16,11 +16,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -37,9 +39,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1(f32) + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir index 0a9792f1807c5..0c81ba0bdeadb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir @@ -2,8 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1:sgpr(s16) (in function: sin_s16_vs) -# SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1:vgpr(s16) (in function: sin_s16_vv) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %2:sgpr(f16) (in function: sin_s16_vs) +# SI-ERR: remark: :0:0: cannot select: %3:vgpr_32(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %2:vgpr(f16) (in function: sin_s16_vv) --- name: sin_s16_vs @@ -21,10 +21,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -43,8 +45,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %2(f16) + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir index 85d852fc779b2..3cd8dfdc9130d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -23,7 +23,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat @@ -33,7 +33,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat @@ -43,7 +43,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat @@ -53,7 +53,7 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat @@ -63,14 +63,14 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %0(p0), %3 :: (load store seq_cst (i32)) + $vgpr0 = COPY %4(i32) ... @@ -98,7 +98,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 @@ -108,7 +108,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 @@ -126,7 +126,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 @@ -136,7 +136,7 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 @@ -146,16 +146,16 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_CONSTANT i64 4 - %5:vgpr(p0) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %6 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i64) = G_CONSTANT i64 4 + %5:vgpr(p0) = G_PTR_ADD %0, %4(i64) + %6:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %5(p0), %3 :: (load store seq_cst (i32)) + $vgpr0 = COPY %6(i32) ... @@ -175,7 +175,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat @@ -185,7 +185,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat @@ -195,7 +195,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat @@ -205,7 +205,7 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat @@ -215,14 +215,14 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 0) - $vgpr0_vgpr1 = COPY %4 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i64>) = G_BUILD_VECTOR %1(i64), %2(i64) + %4:vgpr(i64) = G_AMDGPU_ATOMIC_CMPXCHG %0(p0), %3 :: (load store seq_cst (i64)) + $vgpr0_vgpr1 = COPY %4(i64) ... @@ -250,7 +250,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 @@ -260,7 +260,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 @@ -278,7 +278,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 @@ -288,7 +288,7 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 @@ -298,16 +298,16 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_CONSTANT i64 4 - %5:vgpr(p0) = G_PTR_ADD %0, %4 - %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0) - $vgpr0_vgpr1 = COPY %6 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i64>) = G_BUILD_VECTOR %1(i64), %2(i64) + %4:vgpr(i64) = G_CONSTANT i64 4 + %5:vgpr(p0) = G_PTR_ADD %0, %4(i64) + %6:vgpr(i64) = G_AMDGPU_ATOMIC_CMPXCHG %5(p0), %3 :: (load store seq_cst (i64)) + $vgpr0_vgpr1 = COPY %6(i64) ... @@ -335,7 +335,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 @@ -353,7 +353,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 @@ -371,7 +371,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 @@ -389,7 +389,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 @@ -399,16 +399,16 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_CONSTANT i64 -4 - %5:vgpr(p0) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %6 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i64) = G_CONSTANT i64 -4 + %5:vgpr(p0) = G_PTR_ADD %0, %4(i64) + %6:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %5(p0), %3 :: (load store seq_cst (i32)) + $vgpr0 = COPY %6(i32) ... @@ -428,7 +428,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -437,7 +437,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -446,7 +446,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -455,7 +455,7 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -464,12 +464,12 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX12-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX12-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %0(p0), %3 :: (load store seq_cst (i32)) ... @@ -489,7 +489,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -498,7 +498,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -507,7 +507,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -516,7 +516,7 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; ; GFX12-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -525,11 +525,11 @@ body: | ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX12-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX12-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 0) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i64>) = G_BUILD_VECTOR %1(i64), %2(i64) + %4:vgpr(i64) = G_AMDGPU_ATOMIC_CMPXCHG %0(p0), %3 :: (load store seq_cst (i64)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir index 09eb77fcbdd9d..cafda790d2965 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -29,7 +29,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -45,7 +45,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -56,7 +56,7 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global @@ -66,7 +66,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global @@ -76,7 +76,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global @@ -86,14 +86,14 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %0(p1), %3 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -118,7 +118,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -134,7 +134,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -153,7 +153,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -171,7 +171,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -181,7 +181,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -191,16 +191,16 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_CONSTANT i64 4 - %5:vgpr(p1) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %6 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i64) = G_CONSTANT i64 4 + %5:vgpr(p1) = G_PTR_ADD %0, %4(i64) + %6:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %5(p1), %3 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %6(i32) ... @@ -225,7 +225,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; @@ -241,7 +241,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; @@ -252,7 +252,7 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global @@ -262,7 +262,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global @@ -272,7 +272,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global @@ -282,14 +282,14 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 1) - $vgpr0_vgpr1 = COPY %4 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i64>) = G_BUILD_VECTOR %1(i64), %2(i64) + %4:vgpr(i64) = G_AMDGPU_ATOMIC_CMPXCHG %0(p1), %3 :: (load store seq_cst (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %4(i64) ... @@ -314,7 +314,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; @@ -330,7 +330,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; @@ -349,7 +349,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -367,7 +367,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -377,7 +377,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -387,16 +387,16 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_CONSTANT i64 4 - %5:vgpr(p1) = G_PTR_ADD %0, %4 - %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 1) - $vgpr0_vgpr1 = COPY %6 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i64>) = G_BUILD_VECTOR %1(i64), %2(i64) + %4:vgpr(i64) = G_CONSTANT i64 4 + %5:vgpr(p1) = G_PTR_ADD %0, %4(i64) + %6:vgpr(i64) = G_AMDGPU_ATOMIC_CMPXCHG %5(p1), %3 :: (load store seq_cst (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %6(i64) ... @@ -429,7 +429,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] ; @@ -453,7 +453,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] ; @@ -472,7 +472,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -490,7 +490,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -500,7 +500,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -510,16 +510,16 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_CONSTANT i64 -4 - %5:vgpr(p1) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %6 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i64) = G_CONSTANT i64 -4 + %5:vgpr(p1) = G_PTR_ADD %0, %4(i64) + %6:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %5(p1), %3 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %6(i32) ... @@ -544,7 +544,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -558,7 +558,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -567,7 +567,7 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -576,7 +576,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -585,7 +585,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -594,12 +594,12 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %0(p1), %3 :: (load store seq_cst (i32), addrspace 1) ... @@ -624,7 +624,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -638,7 +638,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -647,7 +647,7 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -656,7 +656,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -665,7 +665,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -674,12 +674,12 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 1) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i64>) = G_BUILD_VECTOR %1(i64), %2(i64) + %4:vgpr(i64) = G_AMDGPU_ATOMIC_CMPXCHG %0(p1), %3 :: (load store seq_cst (i64), addrspace 1) ... @@ -703,7 +703,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -718,7 +718,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -730,7 +730,7 @@ body: | ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr @@ -741,7 +741,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr @@ -752,7 +752,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr @@ -763,14 +763,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %4:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %0(p1), %3 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -794,7 +794,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -809,7 +809,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; @@ -829,7 +829,7 @@ body: | ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 @@ -848,7 +848,7 @@ body: | ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 @@ -859,7 +859,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 @@ -870,15 +870,15 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:sgpr(s64) = G_CONSTANT i64 4095 - %4:sgpr(p1) = G_PTR_ADD %0, %3 - %5:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %4, %5 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %6 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:sgpr(i64) = G_CONSTANT i64 4095 + %4:sgpr(p1) = G_PTR_ADD %0, %3(i64) + %5:vgpr(<2 x i32>) = G_BUILD_VECTOR %1(i32), %2(i32) + %6:vgpr(i32) = G_AMDGPU_ATOMIC_CMPXCHG %4(p1), %5 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir index f4e0c69dfb85b..59b63ea16970a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir @@ -18,9 +18,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_AMDGPU_FFBH_U32 %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_AMDGPU_FFBH_U32 %0(i32) + S_ENDPGM 0, implicit %1(i32) ... @@ -41,9 +41,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_AMDGPU_FFBH_U32 %0(i32) + S_ENDPGM 0, implicit %1(i32) ... @@ -64,8 +64,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_AMDGPU_FFBH_U32 %0(i32) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir index bf2f9367ae8e9..88eab9138f0df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir @@ -18,9 +18,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_AMDGPU_FFBL_B32 %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_AMDGPU_FFBL_B32 %0(i32) + S_ENDPGM 0, implicit %1(i32) ... @@ -41,9 +41,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_AMDGPU_FFBL_B32 %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_AMDGPU_FFBL_B32 %0(i32) + S_ENDPGM 0, implicit %1(i32) ... @@ -64,8 +64,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_AMDGPU_FFBL_B32 %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_AMDGPU_FFBL_B32 %0(i32) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir index 18fb4f7415cc0..cceb13e2af84b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir @@ -19,7 +19,7 @@ body: | ; WAVE64: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p5) ... --- @@ -39,5 +39,5 @@ body: | ; WAVE64: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir index cbf82daca0d2a..22ce86ae7b702 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -35,13 +35,13 @@ body: | ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 0 - %3:vcc(s1) = G_ICMP intpred(eq), %0, %2 - %4:vcc(s1) = G_ICMP intpred(eq), %1, %2 - %5:vcc(s1) = G_AND %3, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 0 + %3:vcc(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:vcc(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:vcc(i1) = G_AND %3, %4 + S_ENDPGM 0, implicit %5(i1) ... # Should fail to select @@ -70,12 +70,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s1) = G_TRUNC %0 - %3:sgpr(s1) = G_TRUNC %1 - %4:sgpr(s1) = G_AND %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i1) = G_TRUNC %0(i32) + %3:sgpr(i1) = G_TRUNC %1(i32) + %4:sgpr(i1) = G_AND %2, %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -103,12 +103,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_AND %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(i16) = G_AND %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -136,12 +136,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_AND %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_AND %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -169,10 +169,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -200,10 +200,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i64) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -231,10 +231,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -262,10 +262,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:sgpr(<2 x s32>) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:sgpr(<2 x i32>) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -293,10 +293,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x s16>) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i16>) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -324,10 +324,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -355,10 +355,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -376,22 +376,22 @@ body: | ; WAVE64-LABEL: name: and_s64_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: [[AND:%[0-9]+]]:vgpr(s64) = G_AND [[COPY]], [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[AND]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[AND:%[0-9]+]]:vgpr(i64) = G_AND [[COPY]], [[COPY1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[AND]](i64) ; ; WAVE32-LABEL: name: and_s64_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: [[AND:%[0-9]+]]:vgpr(s64) = G_AND [[COPY]], [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[AND]](s64) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[AND:%[0-9]+]]:vgpr(i64) = G_AND [[COPY]], [[COPY1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[AND]](i64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -427,14 +427,14 @@ body: | ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s1) = G_TRUNC %0 - %3:vgpr(s1) = G_TRUNC %1 - %4:vcc(s1) = COPY %2 - %5:vcc(s1) = COPY %3 - %6:vcc(s1) = G_AND %4, %5 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i1) = G_TRUNC %0(i32) + %3:vgpr(i1) = G_TRUNC %1(i32) + %4:vcc(i1) = COPY %2(i1) + %5:vcc(i1) = COPY %3(i1) + %6:vcc(i1) = G_AND %4, %5 + S_ENDPGM 0, implicit %6(i1) ... # The selector for the copy of the and result may constrain the result @@ -475,17 +475,17 @@ body: | ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %1:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s1) = G_TRUNC %1(s32) - %sgpr0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s1) = G_TRUNC %sgpr0 - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(p1) = G_IMPLICIT_DEF - %9:vcc(s1) = COPY %0(s1) - %10:vcc(s1) = COPY %2(s1) - %8:vcc(s1) = G_AND %9, %10 - %3:sreg_32_xm0(s1) = COPY %8(s1) - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %sgpr0:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %sgpr0(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:sgpr(f64) = G_IMPLICIT_DEF + %6:vcc(i1) = COPY %1(i1) + %7:vcc(i1) = COPY %3(i1) + %8:vcc(i1) = G_AND %6, %7 + %9:sreg_32_xm0(i1) = COPY %8(i1) + S_ENDPGM 0, implicit %9(i1) ... @@ -523,17 +523,17 @@ body: | ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %1:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s1) = G_TRUNC %1(s32) - %sgpr0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s1) = G_TRUNC %sgpr0 - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(p1) = G_IMPLICIT_DEF - %9:vcc(s1) = COPY %0(s1) - %10:vcc(s1) = COPY %2(s1) - %8:vcc(s1) = G_AND %9, %10 - %3:sreg_64_xexec(s1) = COPY %8(s1) - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %sgpr0:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %sgpr0(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:sgpr(f64) = G_IMPLICIT_DEF + %6:vcc(i1) = COPY %1(i1) + %7:vcc(i1) = COPY %3(i1) + %8:vcc(i1) = G_AND %6, %7 + %9:sreg_64_xexec(i1) = COPY %8(i1) + S_ENDPGM 0, implicit %9(i1) ... @@ -562,8 +562,8 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sreg_32(s32) = G_AND %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sreg_32(i32) = G_AND %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir index c80a690e24537..ded4f516f1629 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -17,10 +17,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s32) = G_ANYEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ANYEXT %1(i16) + $sgpr0 = COPY %2(i32) ... @@ -40,9 +40,9 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s64) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i64) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i64) ... @@ -62,10 +62,10 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s64) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i64) = G_ANYEXT %1(i16) + S_ENDPGM 0, implicit %2(i64) ... @@ -85,9 +85,9 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s64) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i64) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i64) ... @@ -100,9 +100,9 @@ body: | bb.0: liveins: $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s64) = G_ANYEXT %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0 + %2:vgpr(i64) = G_ANYEXT %1 S_ENDPGM 0, implicit %2 ... @@ -117,9 +117,9 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:vgpr(s32) = COPY $vgpr0 -# %1:vcc(s1) = G_ICMP intpred(eq), %0, %0 -# %2:vgpr(s32) = G_ANYEXT %1 +# %0:vgpr(i32) = COPY $vgpr0 +# %1:vcc(i1) = G_ICMP intpred(eq), %0, %0 +# %2:vgpr(i32) = G_ANYEXT %1 # $vgpr0 = COPY %2 # ... @@ -132,18 +132,10 @@ body: | bb.0: liveins: $sgpr0 - ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s16 - ; GCN: liveins: $sgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], [[COPY]], implicit-def dead $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s16) = G_ANYEXT %1 - %3:sgpr(s32) = G_ZEXT %2 - $sgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i64) = G_ANYEXT %1(i16) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -160,10 +152,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_ANYEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ANYEXT %1(i1) + $sgpr0 = COPY %2(i32) ... --- @@ -182,10 +174,10 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s64) = G_ANYEXT %1 - $sgpr0_sgpr1 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i64) = G_ANYEXT %1(i1) + $sgpr0_sgpr1 = COPY %2(i64) ... --- @@ -204,11 +196,11 @@ body: | ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vgpr(s16) = G_ANYEXT %1 - %3:vgpr(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i16) = G_ANYEXT %1(i1) + %3:vgpr(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -225,10 +217,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vgpr(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i32) = G_ANYEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -245,10 +237,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_ANYEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ANYEXT %1(i1) + $sgpr0 = COPY %2(i32) ... --- @@ -265,10 +257,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... @@ -288,8 +280,8 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sreg_32(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_ANYEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sreg_32(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ANYEXT %1(i1) + $sgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir index 21c784b641544..63ac5444ea402 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -53,10 +53,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -106,10 +106,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -159,10 +159,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -212,10 +212,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -265,10 +265,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX10-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -318,10 +318,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s64) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i64) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -371,10 +371,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s64) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i64) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -424,8 +424,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index 95d2bae98df2e..66ac7c0a5be80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -13,11 +13,11 @@ # RUN: FileCheck --check-prefix=ERR %s < %t # ERR-NOT: remark -# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_ASHR %2:sgpr, %3:sgpr(s16) (in function: ashr_s16_s16_ss) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_s32_vv) -# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_s32_ss) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_s32_sv) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_s32_vs) +# ERR: remark: :0:0: cannot select: %4:sgpr(i16) = G_ASHR %2:sgpr, %3:sgpr(i16) (in function: ashr_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_ASHR %2:vgpr, %1:vgpr(i32) (in function: ashr_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(i16) = G_ASHR %2:sgpr, %1:sgpr(i32) (in function: ashr_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_ASHR %2:sgpr, %1:vgpr(i32) (in function: ashr_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_ASHR %2:vgpr, %1:sgpr(i32) (in function: ashr_s16_s32_vs) # ERR-NOT: remark --- @@ -32,45 +32,48 @@ body: | ; GFX8-LABEL: name: ashr_s16_s16_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX9-LABEL: name: ashr_s16_s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX10-LABEL: name: ashr_s16_s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX11-LABEL: name: ashr_s16_s16_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -88,6 +91,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX9-LABEL: name: ashr_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -95,6 +99,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX10-LABEL: name: ashr_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -102,6 +107,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX11-LABEL: name: ashr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -109,12 +115,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -129,40 +135,43 @@ body: | ; GFX8-LABEL: name: ashr_s16_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX9-LABEL: name: ashr_s16_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX10-LABEL: name: ashr_s16_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX11-LABEL: name: ashr_s16_s32_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_ASHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_ASHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -181,6 +190,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX9-LABEL: name: ashr_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -188,6 +198,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX10-LABEL: name: ashr_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -195,6 +206,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX11-LABEL: name: ashr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -202,12 +214,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -226,6 +238,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX9-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -233,6 +246,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX10-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -242,6 +256,7 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; GFX11-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -251,13 +266,13 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_fake16_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ASHR %2, %3 - %5:vgpr(s32) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ASHR %2, %3(i16) + %5:vgpr(i32) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i32) ... --- @@ -278,6 +293,7 @@ body: | ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -287,6 +303,7 @@ body: | ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -299,6 +316,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX11-LABEL: name: ashr_s16_vv_zext_to_s64 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -311,13 +329,13 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ASHR %2, %3 - %5:vgpr(s64) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ASHR %2, %3(i16) + %5:vgpr(i64) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i64) ... --- @@ -332,40 +350,43 @@ body: | ; GFX8-LABEL: name: ashr_s16_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX9-LABEL: name: ashr_s16_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX10-LABEL: name: ashr_s16_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX11-LABEL: name: ashr_s16_s32_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_ASHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[ASHR:%[0-9]+]]:sgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_ASHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -379,40 +400,43 @@ body: | ; GFX8-LABEL: name: ashr_s16_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX9-LABEL: name: ashr_s16_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX10-LABEL: name: ashr_s16_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX11-LABEL: name: ashr_s16_s32_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_ASHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_ASHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -430,6 +454,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX9-LABEL: name: ashr_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -437,6 +462,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX10-LABEL: name: ashr_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -444,6 +470,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; ; GFX11-LABEL: name: ashr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -451,12 +478,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -470,38 +497,41 @@ body: | ; GFX8-LABEL: name: ashr_s16_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX9-LABEL: name: ashr_s16_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX10-LABEL: name: ashr_s16_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + ; ; GFX11-LABEL: name: ashr_s16_s32_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_ASHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_ASHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir index e924694a9f52b..29f227a370827 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir @@ -7,7 +7,7 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_ss) +# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x i16>) (in function: ashr_v2s16_ss) # ERR-NOT: remark --- @@ -21,21 +21,22 @@ body: | ; GFX9-LABEL: name: ashr_v2s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:sgpr(<2 x i16>) = G_ASHR [[COPY]], [[COPY1]](<2 x i16>) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x i16>) + ; ; GFX10-LABEL: name: ashr_v2s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(<2 x i16>) = G_ASHR [[COPY]], [[COPY1]](<2 x i16>) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x i16>) + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -53,6 +54,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] + ; ; GFX10-LABEL: name: ashr_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -60,10 +62,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr0 + %2:vgpr(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -81,6 +83,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] + ; ; GFX10-LABEL: name: ashr_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -88,10 +91,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr0 + %2:vgpr(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -109,6 +112,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] + ; ; GFX10-LABEL: name: ashr_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -116,8 +120,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir index e90e7d95b4c1e..0f2d257a082ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir @@ -23,8 +23,9 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -32,29 +33,31 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s32_local ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_ATOMIC_CMPXCHG %0(p3), %1, %2 :: (load store seq_cst (i32), addrspace 3) + $vgpr0 = COPY %3(i32) ... @@ -74,10 +77,11 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[V_ADD_CO_U32_e64_]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -85,31 +89,33 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 4 - %4:vgpr(p3) = G_PTR_ADD %0, %3 - %5:vgpr(s32) = G_ATOMIC_CMPXCHG %4, %1, %2 :: (load store seq_cst (s32), addrspace 3) - $vgpr0 = COPY %5 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 4 + %4:vgpr(p3) = G_PTR_ADD %0, %3(i32) + %5:vgpr(i32) = G_ATOMIC_CMPXCHG %4(p3), %1, %2 :: (load store seq_cst (i32), addrspace 3) + $vgpr0 = COPY %5(i32) ... @@ -129,8 +135,9 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s64_local ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} @@ -138,29 +145,31 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s64_local ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s64_local ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = COPY $vgpr1_vgpr2 - %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 3) - $vgpr0_vgpr1 = COPY %3 + %1:vgpr(i64) = COPY $vgpr1_vgpr2 + %2:vgpr(i64) = COPY $vgpr3_vgpr4 + %3:vgpr(i64) = G_ATOMIC_CMPXCHG %0(p3), %1, %2 :: (load store seq_cst (i64), addrspace 3) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -180,8 +189,9 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} @@ -189,30 +199,32 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = COPY $vgpr1_vgpr2 - %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s32) = G_CONSTANT i32 4 - %4:vgpr(p3) = G_PTR_ADD %0, %3 - %5:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 3) - $vgpr0_vgpr1 = COPY %5 + %1:vgpr(i64) = COPY $vgpr1_vgpr2 + %2:vgpr(i64) = COPY $vgpr3_vgpr4 + %3:vgpr(i32) = G_CONSTANT i32 4 + %4:vgpr(p3) = G_PTR_ADD %0, %3(i32) + %5:vgpr(i64) = G_ATOMIC_CMPXCHG %0(p3), %1, %2 :: (load store seq_cst (i64), addrspace 3) + $vgpr0_vgpr1 = COPY %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir index faad869f67a62..c637dd652d3e6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir @@ -23,8 +23,9 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -32,29 +33,31 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s32_region ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 2) - $vgpr0 = COPY %3 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_ATOMIC_CMPXCHG %0(p2), %1, %2 :: (load store seq_cst (i32), addrspace 2) + $vgpr0 = COPY %3(i32) ... @@ -74,10 +77,11 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[V_ADD_CO_U32_e64_]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -85,31 +89,33 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 4 - %4:vgpr(p2) = G_PTR_ADD %0, %3 - %5:vgpr(s32) = G_ATOMIC_CMPXCHG %4, %1, %2 :: (load store seq_cst (s32), addrspace 2) - $vgpr0 = COPY %5 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 4 + %4:vgpr(p2) = G_PTR_ADD %0, %3(i32) + %5:vgpr(i32) = G_ATOMIC_CMPXCHG %4(p2), %1, %2 :: (load store seq_cst (i32), addrspace 2) + $vgpr0 = COPY %5(i32) ... @@ -129,8 +135,9 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s64_region ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} @@ -138,29 +145,31 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s64_region ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s64_region ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s64) = COPY $vgpr1_vgpr2 - %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 2) - $vgpr0_vgpr1 = COPY %3 + %1:vgpr(i64) = COPY $vgpr1_vgpr2 + %2:vgpr(i64) = COPY $vgpr3_vgpr4 + %3:vgpr(i64) = G_ATOMIC_CMPXCHG %0(p2), %1, %2 :: (load store seq_cst (i64), addrspace 2) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -180,8 +189,9 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX7-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} @@ -189,30 +199,32 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX9-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; ; GFX11-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i64), addrspace 2) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s64) = COPY $vgpr1_vgpr2 - %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s32) = G_CONSTANT i32 4 - %4:vgpr(p2) = G_PTR_ADD %0, %3 - %5:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 2) - $vgpr0_vgpr1 = COPY %5 + %1:vgpr(i64) = COPY $vgpr1_vgpr2 + %2:vgpr(i64) = COPY $vgpr3_vgpr4 + %3:vgpr(i32) = G_CONSTANT i32 4 + %4:vgpr(p2) = G_PTR_ADD %0, %3(i32) + %5:vgpr(i64) = G_ATOMIC_CMPXCHG %0(p2), %1, %2 :: (load store seq_cst (i64), addrspace 2) + $vgpr0_vgpr1 = COPY %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir index dc317a8413cd5..15466d1f827dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -20,12 +20,12 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GCN-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GCN-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %2 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = G_ATOMICRMW_ADD %0(p0), %1 :: (load store seq_cst (i32)) + $vgpr0 = COPY %2(i32) ... @@ -43,10 +43,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GCN-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = G_ATOMICRMW_ADD %0(p0), %1 :: (load store seq_cst (i32)) ... @@ -72,7 +72,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s32_offset2047 @@ -80,7 +80,7 @@ body: | ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; LARGE_IOFFSET-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047 @@ -96,14 +96,14 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2047 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) + $vgpr0 = COPY %4(i32) ... @@ -129,14 +129,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; LARGE_IOFFSET: liveins: $vgpr0_vgpr1, $vgpr2 ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -151,12 +151,12 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2047 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) ... @@ -182,7 +182,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s32_offset2048 @@ -190,7 +190,7 @@ body: | ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; LARGE_IOFFSET-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048 @@ -206,14 +206,14 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2048 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2048 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) + $vgpr0 = COPY %4(i32) ... @@ -239,14 +239,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; LARGE_IOFFSET: liveins: $vgpr0_vgpr1, $vgpr2 ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -261,12 +261,12 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2048 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2048 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) ... @@ -292,7 +292,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s32_offset4095 @@ -300,7 +300,7 @@ body: | ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; LARGE_IOFFSET-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095 @@ -316,14 +316,14 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) + $vgpr0 = COPY %4(i32) ... @@ -349,14 +349,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; LARGE_IOFFSET: liveins: $vgpr0_vgpr1, $vgpr2 ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -371,12 +371,12 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) ... @@ -402,7 +402,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097 @@ -418,7 +418,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097 @@ -434,7 +434,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097 @@ -450,7 +450,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX12-LABEL: name: flat_atomicrmw_add_s32_offset4097 @@ -458,14 +458,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4097, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4097, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4097 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4097 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) + $vgpr0 = COPY %4(i32) ... @@ -491,7 +491,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -506,7 +506,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -521,7 +521,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 @@ -536,19 +536,19 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) ; ; GFX12-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4097, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX12-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4097, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4097 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4097 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i32)) ... @@ -566,12 +566,12 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GCN-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) - $vgpr0_vgpr1 = COPY %2 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_ATOMICRMW_ADD %0(p0), %1 :: (load store seq_cst (i64)) + $vgpr0_vgpr1 = COPY %2(i64) ... @@ -589,10 +589,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GCN-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_ATOMICRMW_ADD %0(p0), %1 :: (load store seq_cst (i64)) ... @@ -618,7 +618,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s64_offset4095 @@ -626,7 +626,7 @@ body: | ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; LARGE_IOFFSET-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; LARGE_IOFFSET-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095 @@ -642,14 +642,14 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0) - $vgpr0_vgpr1 = COPY %4 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i64) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i64)) + $vgpr0_vgpr1 = COPY %4(i64) ... @@ -675,14 +675,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; ; LARGE_IOFFSET-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; LARGE_IOFFSET: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; LARGE_IOFFSET-NEXT: {{ $}} ; LARGE_IOFFSET-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; LARGE_IOFFSET-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; LARGE_IOFFSET-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) ; ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -697,11 +697,11 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p0) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i64) = G_ATOMICRMW_ADD %3(p0), %1 :: (load store seq_cst (i64)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir index 0d1508900a98a..3aff95639cc1e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -26,7 +26,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s32 @@ -34,7 +34,7 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s32 @@ -42,7 +42,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s32 @@ -50,7 +50,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s32 @@ -58,7 +58,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s32 @@ -66,12 +66,12 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %2 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %2(i32) ... @@ -94,45 +94,45 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... @@ -155,7 +155,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047 @@ -171,7 +171,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047 @@ -179,7 +179,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047 @@ -187,7 +187,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047 @@ -195,7 +195,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset2047 @@ -203,14 +203,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2047 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -233,7 +233,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -248,40 +248,40 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2047 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... @@ -304,7 +304,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048 @@ -320,7 +320,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048 @@ -328,7 +328,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048 @@ -344,7 +344,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048 @@ -352,7 +352,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset2048 @@ -360,14 +360,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2048 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2048 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -390,7 +390,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -405,14 +405,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -427,26 +427,26 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2048 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2048 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... @@ -469,7 +469,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095 @@ -485,7 +485,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095 @@ -493,7 +493,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095 @@ -509,7 +509,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095 @@ -517,7 +517,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset4095 @@ -525,14 +525,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -555,7 +555,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -570,14 +570,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -592,26 +592,26 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... @@ -635,7 +635,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -651,7 +651,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -667,7 +667,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -683,7 +683,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -699,7 +699,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -707,14 +707,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4097, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4097, 1, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4097 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4097 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -738,7 +738,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -753,7 +753,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i32), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -768,7 +768,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -783,7 +783,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 @@ -798,19 +798,19 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4097, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4097, 0, implicit $exec :: (load store seq_cst (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 4097 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 4097 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i32) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... @@ -833,7 +833,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s64 @@ -841,7 +841,7 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s64 @@ -849,7 +849,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s64 @@ -857,7 +857,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s64 @@ -865,7 +865,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s64 @@ -873,12 +873,12 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) - $vgpr0_vgpr1 = COPY %2 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %2(i64) ... @@ -901,45 +901,45 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i64), addrspace 1) ... @@ -962,7 +962,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095 @@ -978,7 +978,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095 @@ -986,7 +986,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095 @@ -1002,7 +1002,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095 @@ -1010,7 +1010,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; ; GFX12-LABEL: name: global_atomicrmw_add_s64_offset4095 @@ -1018,14 +1018,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 1) - $vgpr0_vgpr1 = COPY %4 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i64) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %4(i64) ... @@ -1048,7 +1048,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1063,14 +1063,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (i64), addrspace 1) ; ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1085,25 +1085,25 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) ; ; GFX12-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX12-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (i64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 1) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_CONSTANT i64 4095 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + %4:vgpr(i64) = G_ATOMICRMW_ADD %3(p1), %1 :: (load store seq_cst (i64), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir index 0386343b08ce6..138f9943fd659 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -23,27 +23,33 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 3) ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; ; GFX9-LABEL: name: atomicrmw_fadd_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (f32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; ; GFX6-LABEL: name: atomicrmw_fadd_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(f32) = G_ATOMICRMW_FADD [[COPY]](p3), [[BITCAST]] :: (load store seq_cst (f32), addrspace 3) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[ATOMICRMW_FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[COPY2]](i32) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) - $vgpr0 = COPY %2 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:vgpr(f32) = G_ATOMICRMW_FADD %0(p3), %2 :: (load store seq_cst (f32), addrspace 3) + %4:vgpr(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... @@ -62,23 +68,27 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 3) + ; ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_ADD_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: DS_ADD_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (f32), addrspace 3) + ; ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(f32) = G_ATOMICRMW_FADD [[COPY]](p3), [[BITCAST]] :: (load store seq_cst (f32), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:vgpr(f32) = G_ATOMICRMW_FADD %0(p3), %2 :: (load store seq_cst (f32), addrspace 3) ... @@ -97,30 +107,36 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 3) ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (f32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 4 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(f32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[BITCAST]] :: (load store seq_cst (f32), addrspace 3) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[ATOMICRMW_FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[COPY2]](i32) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 4 - %3:vgpr(p3) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p3), %1 :: (load store seq_cst (s32), addrspace 3) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 4 + %3:vgpr(p3) = G_PTR_ADD %0, %2(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_ATOMICRMW_FADD %3(p3), %4 :: (load store seq_cst (f32), addrspace 3) + %6:vgpr(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir index 75d65f1f6893c..f2f528dd1e117 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -23,27 +23,33 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 2) ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; ; GFX9-LABEL: name: atomicrmw_fadd_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 2) ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; ; GFX6-LABEL: name: atomicrmw_fadd_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(f32) = G_ATOMICRMW_FADD [[COPY]](p2), [[BITCAST]] :: (load store seq_cst (f32), addrspace 2) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[ATOMICRMW_FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[COPY2]](i32) %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) - $vgpr0 = COPY %2 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:vgpr(f32) = G_ATOMICRMW_FADD %0(p2), %2 :: (load store seq_cst (f32), addrspace 2) + %4:vgpr(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... @@ -62,23 +68,27 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 2) + ; ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 2) + ; ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(f32) = G_ATOMICRMW_FADD [[COPY]](p2), [[BITCAST]] :: (load store seq_cst (f32), addrspace 2) %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:vgpr(f32) = G_ATOMICRMW_FADD %0(p2), %2 :: (load store seq_cst (f32), addrspace 2) ... @@ -97,30 +107,36 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 2) ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (f32), addrspace 2) ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 4 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(f32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[BITCAST]] :: (load store seq_cst (f32), addrspace 2) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[ATOMICRMW_FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[COPY2]](i32) %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 4 - %3:vgpr(p2) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p2), %1 :: (load store seq_cst (s32), addrspace 2) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 4 + %3:vgpr(p2) = G_PTR_ADD %0, %2(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_ATOMICRMW_FADD %3(p2), %4 :: (load store seq_cst (f32), addrspace 2) + %6:vgpr(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir index f9fc72a65dbd9..ea05b809b26d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -22,27 +22,29 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX7-LABEL: name: atomicrmw_xchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX9-LABEL: name: atomicrmw_xchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) - $vgpr0 = COPY %2 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) + $vgpr0 = COPY %2(i32) ... @@ -61,30 +63,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[V_ADD_CO_U32_e64_]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX7-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX9-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 4 - %3:vgpr(p3) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p3), %1 :: (load store seq_cst (s32), addrspace 3) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 4 + %3:vgpr(p3) = G_PTR_ADD %0, %2(i32) + %4:vgpr(i32) = G_ATOMICRMW_XCHG %3(p3), %1 :: (load store seq_cst (i32), addrspace 3) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir index 551261ee8a76b..d040e96b05a79 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir @@ -22,27 +22,29 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX7-LABEL: name: atomicrmw_xchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX9-LABEL: name: atomicrmw_xchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) - $vgpr0 = COPY %2 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_ATOMICRMW_XCHG %0(p2), %1 :: (load store seq_cst (i32), addrspace 2) + $vgpr0 = COPY %2(i32) ... @@ -61,30 +63,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[V_ADD_CO_U32_e64_]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX7-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; ; GFX9-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (i32), addrspace 2) ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 4 - %3:vgpr(p2) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p2), %1 :: (load store seq_cst (s32), addrspace 2) - $vgpr0 = COPY %4 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 4 + %3:vgpr(p2) = G_PTR_ADD %0, %2(i32) + %4:vgpr(i32) = G_ATOMICRMW_XCHG %3(p2), %1 :: (load store seq_cst (i32), addrspace 2) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir index 04f7176ebf68c..9870909f58e8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir @@ -14,8 +14,8 @@ tracksRegLiveness: true body: | bb.0: liveins: $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_BITCAST %0 - %2:vgpr(s32) = G_BITCAST %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = G_BITCAST %0 + %2:vgpr(i32) = G_BITCAST %1 S_ENDPGM 0, implicit %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir index 5d9c2b7b1150c..40878b2eddad0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir @@ -15,9 +15,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BREV_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_BITREVERSE %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_BITREVERSE %0 + S_ENDPGM 0, implicit %1(i32) ... --- @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFREV_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_BITREVERSE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_BITREVERSE %0 + S_ENDPGM 0, implicit %1(i32) ... --- @@ -53,9 +53,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFREV_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_BITREVERSE %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_BITREVERSE %0 + S_ENDPGM 0, implicit %1(i32) ... --- @@ -72,9 +72,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BREV_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_BITREVERSE %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_BITREVERSE %0 + S_ENDPGM 0, implicit %1(i64) ... --- @@ -95,12 +95,12 @@ body: | ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY1]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_BFREV_B32_e64_]], %subreg.sub0, [[V_BFREV_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %2:vgpr(s32), %3:vgpr(s32) = G_UNMERGE_VALUES %0(s64) - %4:vgpr(s32) = G_BITREVERSE %3 - %5:vgpr(s32) = G_BITREVERSE %2 - %1:vgpr(s64) = G_MERGE_VALUES %4(s32), %5(s32) - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32), %2:vgpr(i32) = G_UNMERGE_VALUES %0(i64) + %3:vgpr(i32) = G_BITREVERSE %2 + %4:vgpr(i32) = G_BITREVERSE %1 + %5:vgpr(i64) = G_MERGE_VALUES %3(i32), %4(i32) + S_ENDPGM 0, implicit %5(i64) ... --- @@ -121,10 +121,10 @@ body: | ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY1]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_BFREV_B32_e64_]], %subreg.sub0, [[V_BFREV_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %2:sgpr(s32), %3:sgpr(s32) = G_UNMERGE_VALUES %0(s64) - %4:vgpr(s32) = G_BITREVERSE %3 - %5:vgpr(s32) = G_BITREVERSE %2 - %1:vgpr(s64) = G_MERGE_VALUES %4(s32), %5(s32) - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32), %2:sgpr(i32) = G_UNMERGE_VALUES %0(i64) + %3:vgpr(i32) = G_BITREVERSE %2 + %4:vgpr(i32) = G_BITREVERSE %1 + %5:vgpr(i64) = G_MERGE_VALUES %3(i32), %4(i32) + S_ENDPGM 0, implicit %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-br.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-br.mir index 95b25012c64d1..19ae0fb226b18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-br.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-br.mir @@ -16,8 +16,11 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) + G_BR %bb.1 bb.1: + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir index ecb07f79e9fd1..a5fa35d2d7dbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -3,8 +3,8 @@ # RUN: FileCheck -check-prefix=ERR %s < %t # ERR-NOT: remark: -# ERR: remark: :0:0: cannot select: G_BRCOND %1:sgpr(s1), %bb.1 (in function: brcond_sgpr) -# ERR-NEXT: remark: :0:0: cannot select: G_BRCOND %1:vgpr(s1), %bb.1 (in function: brcond_vgpr) +# ERR: remark: :0:0: cannot select: G_BRCOND %1:sgpr(i1), %bb.1 (in function: brcond_sgpr) +# ERR-NEXT: remark: :0:0: cannot select: G_BRCOND %1:vgpr(i1), %bb.1 (in function: brcond_vgpr) # ERR-NOT: remark: --- @@ -28,15 +28,18 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - G_BRCOND %2, %bb.1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + G_BRCOND %2(i32), %bb.1 bb.1: + + ... --- @@ -57,13 +60,16 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:sgpr(s32) = G_IMPLICIT_DEF - G_BRCOND %0, %bb.1 + %0:sgpr(i32) = G_IMPLICIT_DEF + G_BRCOND %0(i32), %bb.1 bb.1: + + ... --- @@ -91,18 +97,23 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - G_BRCOND %2, %bb.1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + G_BRCOND %2(i32), %bb.1 G_BR %bb.1 bb.1: + successors: %bb.2(0x80000000) bb.2: + + + ... --- @@ -125,15 +136,18 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - G_BRCOND %2, %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + G_BRCOND %2(i1), %bb.1 bb.1: + + ... # Don't try to select this. @@ -149,20 +163,23 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: G_BRCOND [[TRUNC]](s1), %bb.1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: G_BRCOND [[TRUNC]](i1), %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - G_BRCOND %1, %bb.1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + G_BRCOND %1(i1), %bb.1 bb.1: + + ... # Don't try to select this. @@ -178,20 +195,23 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: G_BRCOND [[TRUNC]](s1), %bb.1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: G_BRCOND [[TRUNC]](i1), %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - G_BRCOND %1, %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + G_BRCOND %1(i1), %bb.1 bb.1: + + ... --- @@ -214,15 +234,19 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0:vgpr(s32), %1:vgpr(s32) - G_BRCOND %2(s1), %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %3:vgpr(f32) = G_BITCAST %0(i32) + %2:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %3(f32), %1(i32) + G_BRCOND %2(i1), %bb.1 bb.1: + + ... --- @@ -249,19 +273,24 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %5:vcc(s1) = G_FCMP floatpred(oeq), %2, %3 - %6:vcc(s1) = G_AND %4, %5 - G_BRCOND %6(s1), %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %7:vgpr(f32) = G_BITCAST %2(i32) + %8:vgpr(f32) = G_BITCAST %3(i32) + %5:vcc(i1) = G_FCMP floatpred(oeq), %7(f32), %8 + %6:vcc(i1) = G_AND %4, %5 + G_BRCOND %6(i1), %bb.1 bb.1: + + ... --- @@ -289,19 +318,22 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $vgpr0, $vgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:sgpr(s32) = COPY $sgpr0 - %3:sgpr(s1) = G_TRUNC %2(s32) - %4:vcc(s1) = COPY %3(s1) - %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %6:vcc(s1) = G_AND %5, %4 - G_BRCOND %6(s1), %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %2(i32) + %4:vcc(i1) = COPY %3(i1) + %5:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %6:vcc(i1) = G_AND %5, %4 + G_BRCOND %6(i1), %bb.1 bb.1: + + ... --- @@ -327,16 +359,19 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %3:sgpr(s1) = G_CONSTANT i1 true - %4:vcc(s1) = COPY %3(s1) - %5:vcc(s1) = G_XOR %2, %4 - G_BRCOND %5(s1), %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:sgpr(i1) = G_CONSTANT i1 true + %4:vcc(i1) = COPY %3(i1) + %5:vcc(i1) = G_XOR %2, %4 + G_BRCOND %5(i1), %bb.1 bb.1: + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir index 5b8c2840b0156..cbc57c3fd95b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir @@ -19,6 +19,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16711935 ; GFX7-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 [[S_MOV_B32_]], [[V_ALIGNBIT_B32_e64_1]], [[V_ALIGNBIT_B32_e64_]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_BFI_B32_e64_]] + ; ; GFX8-LABEL: name: bswap_i32_vv ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -26,7 +27,7 @@ body: | ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051 ; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_BSWAP %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_BSWAP %0 + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir index 3b4f66b82193f..1a22ef7d441f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -19,10 +19,10 @@ body: | ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -42,12 +42,12 @@ body: | ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %1, %2(i32) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... # s_pack_hl_b32 was introduced in GFX11 @@ -78,12 +78,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY1]], [[COPY]] ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_PACK_HL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %0 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %1, %2(i32) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %0(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -103,13 +103,13 @@ body: | ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %0, %2 - %4:sgpr(s32) = G_LSHR %1, %2 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %0, %2(i32) + %4:sgpr(i32) = G_LSHR %1, %2(i32) + %5:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %4(i32) + S_ENDPGM 0, implicit %5(<2 x i16>) ... # TODO: Should this use an and instead? @@ -130,10 +130,10 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 0 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -153,10 +153,10 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 0 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %1(i32), %0(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -174,10 +174,10 @@ body: | ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_IMPLICIT_DEF - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_IMPLICIT_DEF + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -197,10 +197,10 @@ body: | ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_IMPLICIT_DEF - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_IMPLICIT_DEF + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %1(i32), %0(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -220,10 +220,10 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = G_IMPLICIT_DEF - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_IMPLICIT_DEF + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -241,10 +241,10 @@ body: | ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_IMPLICIT_DEF - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_IMPLICIT_DEF + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -264,10 +264,10 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = G_CONSTANT i32 0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_CONSTANT i32 0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -287,10 +287,10 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 0 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -309,12 +309,12 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def dead $scc ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] - %0:sgpr(s32) = G_CONSTANT i32 0 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %0 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = G_CONSTANT i32 0 + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %1, %2(i32) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %0(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... # Don't use pack since it would duplicate the shift use @@ -337,12 +337,12 @@ body: | ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 - S_ENDPGM 0, implicit %4, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %1, %2(i32) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>), implicit %3(i32) ... --- @@ -364,13 +364,13 @@ body: | ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[S_LSHR_B32_]], [[COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]], implicit [[S_LSHR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %0, %2 - %4:sgpr(s32) = G_LSHR %1, %2 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 - S_ENDPGM 0, implicit %5, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %0, %2(i32) + %4:sgpr(i32) = G_LSHR %1, %2(i32) + %5:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %4(i32) + S_ENDPGM 0, implicit %5(<2 x i16>), implicit %3(i32) ... --- @@ -403,13 +403,13 @@ body: | ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[S_LSHR_B32_]] ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_PACK_HL_B32_B16_]], implicit [[S_LSHR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %0, %2 - %4:sgpr(s32) = G_LSHR %1, %2 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 - S_ENDPGM 0, implicit %5, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %0, %2(i32) + %4:sgpr(i32) = G_LSHR %1, %2(i32) + %5:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %4(i32) + S_ENDPGM 0, implicit %5(<2 x i16>), implicit %4(i32) ... --- @@ -431,12 +431,12 @@ body: | ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 15 - %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 15 + %3:sgpr(i32) = G_LSHR %1, %2(i32) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -459,13 +459,13 @@ body: | ; GFX9PLUS-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 15 - %3:sgpr(s32) = G_LSHR %0, %2 - %4:sgpr(s32) = G_LSHR %1, %2 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 15 + %3:sgpr(i32) = G_LSHR %0, %2(i32) + %4:sgpr(i32) = G_LSHR %1, %2(i32) + %5:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %4(i32) + S_ENDPGM 0, implicit %5(<2 x i16>) ... --- @@ -480,10 +480,10 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_constant_constant ; GFX9PLUS: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s32) = G_CONSTANT i32 123 - %1:sgpr(s32) = G_CONSTANT i32 456 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_CONSTANT i32 123 + %1:sgpr(i32) = G_CONSTANT i32 456 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -498,10 +498,10 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_constant_impdef ; GFX9PLUS: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s32) = G_CONSTANT i32 123 - %1:sgpr(s32) = G_IMPLICIT_DEF - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_CONSTANT i32 123 + %1:sgpr(i32) = G_IMPLICIT_DEF + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -518,10 +518,10 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = G_IMPLICIT_DEF - %1:sgpr(s32) = G_CONSTANT i32 123 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_IMPLICIT_DEF + %1:sgpr(i32) = G_CONSTANT i32 123 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -536,10 +536,10 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_impdef_impdef ; GFX9PLUS: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:sgpr(s32) = G_IMPLICIT_DEF - %1:sgpr(s32) = G_IMPLICIT_DEF - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_IMPLICIT_DEF + %1:sgpr(i32) = G_IMPLICIT_DEF + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -554,12 +554,12 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_zext_constant_zext_constant ; GFX9PLUS: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s16) = G_CONSTANT i16 123 - %1:sgpr(s16) = G_CONSTANT i16 456 - %2:sgpr(s32) = G_ZEXT %0 - %3:sgpr(s32) = G_ZEXT %1 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i16) = G_CONSTANT i16 123 + %1:sgpr(i16) = G_CONSTANT i16 456 + %2:sgpr(i32) = G_ZEXT %0(i16) + %3:sgpr(i32) = G_ZEXT %1(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %2(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -580,12 +580,12 @@ body: | ; GFX9PLUS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_AND_B32_]], [[S_AND_B32_1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s16) = G_IMPLICIT_DEF - %1:sgpr(s16) = G_CONSTANT i16 123 - %2:sgpr(s32) = G_ZEXT %0 - %3:sgpr(s32) = G_ZEXT %1 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i16) = G_IMPLICIT_DEF + %1:sgpr(i16) = G_CONSTANT i16 123 + %2:sgpr(i32) = G_ZEXT %0(i16) + %3:sgpr(i32) = G_ZEXT %1(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %2(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -600,12 +600,12 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_sext_constant_sext_constant ; GFX9PLUS: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294836208 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s16) = G_CONSTANT i16 -16 - %1:sgpr(s16) = G_CONSTANT i16 -3 - %2:sgpr(s32) = G_SEXT %0 - %3:sgpr(s32) = G_SEXT %1 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i16) = G_CONSTANT i16 -16 + %1:sgpr(i16) = G_CONSTANT i16 -3 + %2:sgpr(i32) = G_SEXT %0(i16) + %3:sgpr(i32) = G_SEXT %1(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %2(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -620,12 +620,12 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant ; GFX9PLUS: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539 ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s16) = G_CONSTANT i16 123 - %1:sgpr(s16) = G_CONSTANT i16 456 - %2:sgpr(s32) = G_ANYEXT %0 - %3:sgpr(s32) = G_ANYEXT %1 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i16) = G_CONSTANT i16 123 + %1:sgpr(i16) = G_CONSTANT i16 456 + %2:sgpr(i32) = G_ANYEXT %0(i16) + %3:sgpr(i32) = G_ANYEXT %1(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %2(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -642,12 +642,12 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s16) = G_IMPLICIT_DEF - %1:sgpr(s16) = G_CONSTANT i16 123 - %2:sgpr(s32) = G_ANYEXT %0 - %3:sgpr(s32) = G_ANYEXT %1 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i16) = G_IMPLICIT_DEF + %1:sgpr(i16) = G_CONSTANT i16 123 + %2:sgpr(i32) = G_ANYEXT %0(i16) + %3:sgpr(i32) = G_ANYEXT %1(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %2(i32), %3(i32) + S_ENDPGM 0, implicit %4(<2 x i16>) ... --- @@ -667,10 +667,10 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 456 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 456 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -690,10 +690,10 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = G_CONSTANT i32 456 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_CONSTANT i32 456 + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -713,10 +713,10 @@ body: | ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CONSTANT i32 0 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -736,8 +736,8 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] - %0:sgpr(s32) = G_CONSTANT i32 0 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = G_CONSTANT i32 0 + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir index f4531bc83877d..495ed1eb79d58 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -41,10 +41,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -64,10 +64,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -87,10 +87,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -110,8 +110,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %4:sgpr(<2 x s64>) = G_BUILD_VECTOR %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir index d6a433ae00076..c72a2ce9ce383 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1 = COPY %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -40,10 +40,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1 = COPY %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -62,10 +62,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr0 - %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1 = COPY %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr0 + %2:vgpr(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -84,10 +84,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1 = COPY %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + $sgpr0_sgpr1 = COPY %2(<4 x i16>) ... --- @@ -107,11 +107,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = COPY $sgpr2 - %3:sgpr(<6 x s16>) = G_CONCAT_VECTORS %0, %1, %2 - $sgpr0_sgpr1_sgpr2 = COPY %3 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = COPY $sgpr2 + %3:sgpr(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>) + $sgpr0_sgpr1_sgpr2 = COPY %3(<6 x i16>) ... --- @@ -131,11 +131,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = COPY $vgpr2 - %3:vgpr(<6 x s16>) = G_CONCAT_VECTORS %0, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = COPY $vgpr2 + %3:vgpr(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -156,12 +156,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = COPY $sgpr2 - %3:sgpr(<2 x s16>) = COPY $sgpr3 - %4:sgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = COPY $sgpr2 + %3:sgpr(<2 x i16>) = COPY $sgpr3 + %4:sgpr(<8 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>), %3(<2 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4(<8 x i16>) ... --- @@ -182,12 +182,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = COPY $vgpr2 - %3:vgpr(<2 x s16>) = COPY $vgpr3 - %4:vgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = COPY $vgpr2 + %3:vgpr(<2 x i16>) = COPY $vgpr3 + %4:vgpr(<8 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>), %3(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<8 x i16>) ... --- @@ -206,10 +206,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<8 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(<8 x i16>) ... --- @@ -228,10 +228,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:vgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:vgpr(<8 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<8 x i16>) ... --- @@ -253,13 +253,13 @@ body: | ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = COPY $sgpr2 - %3:sgpr(<2 x s16>) = COPY $sgpr3 - %4:sgpr(<2 x s16>) = COPY $sgpr4 - %5:sgpr(<10 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3, %4 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY %5 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = COPY $sgpr2 + %3:sgpr(<2 x i16>) = COPY $sgpr3 + %4:sgpr(<2 x i16>) = COPY $sgpr4 + %5:sgpr(<10 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>), %3(<2 x i16>), %4(<2 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY %5(<10 x i16>) ... --- @@ -281,13 +281,13 @@ body: | ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = COPY $vgpr2 - %3:vgpr(<2 x s16>) = COPY $vgpr3 - %4:vgpr(<2 x s16>) = COPY $vgpr4 - %5:vgpr(<10 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3, %4 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %5 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = COPY $vgpr2 + %3:vgpr(<2 x i16>) = COPY $vgpr3 + %4:vgpr(<2 x i16>) = COPY $vgpr4 + %5:vgpr(<10 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>), %3(<2 x i16>), %4(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %5(<10 x i16>) ... --- @@ -307,11 +307,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 - %3:sgpr(<12 x s16>) = G_CONCAT_VECTORS %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i16>) = COPY $sgpr4_sgpr5 + %3:sgpr(<12 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>), %2(<4 x i16>) + S_ENDPGM 0, implicit %3(<12 x i16>) ... --- @@ -331,11 +331,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:vgpr(<12 x s16>) = G_CONCAT_VECTORS %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:vgpr(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:vgpr(<12 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>), %2(<4 x i16>) + S_ENDPGM 0, implicit %3(<12 x i16>) ... --- @@ -356,12 +356,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 - %3:sgpr(<4 x s16>) = COPY $sgpr6_sgpr7 - %4:sgpr(<16 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i16>) = COPY $sgpr4_sgpr5 + %3:sgpr(<4 x i16>) = COPY $sgpr6_sgpr7 + %4:sgpr(<16 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>), %2(<4 x i16>), %3(<4 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4(<16 x i16>) ... --- @@ -380,10 +380,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(<8 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<8 x s16>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %2:sgpr(<16 x s16>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 + %0:sgpr(<8 x i16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(<8 x i16>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:sgpr(<16 x i16>) = G_CONCAT_VECTORS %0(<8 x i16>), %1(<8 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2(<16 x i16>) ... --- @@ -402,10 +402,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(<16 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, - %1:sgpr(<16 x s16>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %4:sgpr(<32 x s16>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %4 + %0:sgpr(<16 x i16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(<16 x i16>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %2:sgpr(<32 x i16>) = G_CONCAT_VECTORS %0(<16 x i16>), %1(<16 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %2(<32 x i16>) ... --- @@ -430,16 +430,16 @@ body: | ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 - %3:sgpr(<4 x s16>) = COPY $sgpr6_sgpr7 - %4:sgpr(<4 x s16>) = COPY $sgpr8_sgpr9 - %5:sgpr(<4 x s16>) = COPY $sgpr10_sgpr11 - %6:sgpr(<4 x s16>) = COPY $sgpr12_sgpr13 - %7:sgpr(<4 x s16>) = COPY $sgpr14_sgpr15 - %8:sgpr(<32 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3, %4, %5, %6, %7 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %8 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i16>) = COPY $sgpr4_sgpr5 + %3:sgpr(<4 x i16>) = COPY $sgpr6_sgpr7 + %4:sgpr(<4 x i16>) = COPY $sgpr8_sgpr9 + %5:sgpr(<4 x i16>) = COPY $sgpr10_sgpr11 + %6:sgpr(<4 x i16>) = COPY $sgpr12_sgpr13 + %7:sgpr(<4 x i16>) = COPY $sgpr14_sgpr15 + %8:sgpr(<32 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>), %2(<4 x i16>), %3(<4 x i16>), %4(<4 x i16>), %5(<4 x i16>), %6(<4 x i16>), %7(<4 x i16>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %8(<32 x i16>) ... --- @@ -464,16 +464,16 @@ body: | ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:vgpr(<4 x s16>) = COPY $vgpr6_vgpr7 - %4:vgpr(<4 x s16>) = COPY $vgpr8_vgpr9 - %5:vgpr(<4 x s16>) = COPY $vgpr10_vgpr11 - %6:vgpr(<4 x s16>) = COPY $vgpr12_vgpr13 - %7:vgpr(<4 x s16>) = COPY $vgpr14_vgpr15 - %8:vgpr(<32 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3, %4, %5, %6, %7 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8 + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:vgpr(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:vgpr(<4 x i16>) = COPY $vgpr6_vgpr7 + %4:vgpr(<4 x i16>) = COPY $vgpr8_vgpr9 + %5:vgpr(<4 x i16>) = COPY $vgpr10_vgpr11 + %6:vgpr(<4 x i16>) = COPY $vgpr12_vgpr13 + %7:vgpr(<4 x i16>) = COPY $vgpr14_vgpr15 + %8:vgpr(<32 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>), %2(<4 x i16>), %3(<4 x i16>), %4(<4 x i16>), %5(<4 x i16>), %6(<4 x i16>), %7(<4 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8(<32 x i16>) ... @@ -497,10 +497,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - %4:sgpr(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(<4 x i32>) ... --- @@ -519,10 +519,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:vgpr(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:vgpr(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -543,12 +543,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 - %3:sgpr(<2 x s32>) = COPY $sgpr6_sgpr7 - %4:sgpr(<8 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:sgpr(<2 x i32>) = COPY $sgpr4_sgpr5 + %3:sgpr(<2 x i32>) = COPY $sgpr6_sgpr7 + %4:sgpr(<8 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>), %2(<2 x i32>), %3(<2 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4(<8 x i32>) ... --- @@ -568,10 +568,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %2:sgpr(<8 x s32>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 + %0:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(<4 x i32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:sgpr(<8 x i32>) = G_CONCAT_VECTORS %0(<4 x i32>), %1(<4 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2(<8 x i32>) ... --- @@ -590,10 +590,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, - %1:sgpr(<8 x s32>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %4:sgpr(<16 x s32>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %4 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(<8 x i32>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %2:sgpr(<16 x i32>) = G_CONCAT_VECTORS %0(<8 x i32>), %1(<8 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %2(<16 x i32>) ... --- @@ -618,16 +618,16 @@ body: | ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:vgpr(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:vgpr(<2 x s32>) = COPY $vgpr6_vgpr7 - %4:vgpr(<2 x s32>) = COPY $vgpr8_vgpr9 - %5:vgpr(<2 x s32>) = COPY $vgpr10_vgpr11 - %6:vgpr(<2 x s32>) = COPY $vgpr12_vgpr13 - %7:vgpr(<2 x s32>) = COPY $vgpr14_vgpr15 - %8:vgpr(<16 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3, %4, %5, %6, %7 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8 + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:vgpr(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x i32>) = COPY $vgpr6_vgpr7 + %4:vgpr(<2 x i32>) = COPY $vgpr8_vgpr9 + %5:vgpr(<2 x i32>) = COPY $vgpr10_vgpr11 + %6:vgpr(<2 x i32>) = COPY $vgpr12_vgpr13 + %7:vgpr(<2 x i32>) = COPY $vgpr14_vgpr15 + %8:vgpr(<16 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>), %2(<2 x i32>), %3(<2 x i32>), %4(<2 x i32>), %5(<2 x i32>), %6(<2 x i32>), %7(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8(<16 x i32>) ... --- @@ -646,10 +646,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] - %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(<16 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %2:sgpr(<32 x s32>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2 + %0:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(<16 x i32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %2:sgpr(<32 x i32>) = G_CONCAT_VECTORS %0(<16 x i32>), %1(<16 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2(<32 x i32>) ... --- @@ -668,10 +668,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<2 x s64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %3:sgpr(<4 x s64>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %3 + %0:sgpr(<2 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(<2 x i64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:sgpr(<4 x i64>) = G_CONCAT_VECTORS %0(<2 x i64>), %1(<2 x i64>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2(<4 x i64>) ... --- @@ -690,10 +690,10 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384 = REG_SEQUENCE [[DEF]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[DEF1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(<3 x s64>) = G_IMPLICIT_DEF - %1:sgpr(<3 x s64>) = G_IMPLICIT_DEF - %2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<3 x i64>) = G_IMPLICIT_DEF + %1:sgpr(<3 x i64>) = G_IMPLICIT_DEF + %2:sgpr(<6 x i64>) = G_CONCAT_VECTORS %0(<3 x i64>), %1(<3 x i64>) + S_ENDPGM 0, implicit %2(<6 x i64>) ... --- @@ -712,10 +712,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, - %1:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %4 + %0:sgpr(<4 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(<4 x i64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %2:sgpr(<8 x i64>) = G_CONCAT_VECTORS %0(<4 x i64>), %1(<4 x i64>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %2(<8 x i64>) ... --- @@ -736,12 +736,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7, [[COPY2]], %subreg.sub8_sub9_sub10_sub11, [[COPY3]], %subreg.sub12_sub13_sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<2 x s64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %2:sgpr(<2 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - %3:sgpr(<2 x s64>) = COPY $sgpr12_sgpr13_sgpr14_sgpr15 - %4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %4 + %0:sgpr(<2 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(<2 x i64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:sgpr(<2 x i64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 + %3:sgpr(<2 x i64>) = COPY $sgpr12_sgpr13_sgpr14_sgpr15 + %4:sgpr(<8 x i64>) = G_CONCAT_VECTORS %0(<2 x i64>), %1(<2 x i64>), %2(<2 x i64>), %3(<2 x i64>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %4(<8 x i64>) ... --- @@ -762,8 +762,8 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %3:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %3 + %2:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0(<2 x p1>), %1(<2 x p1>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2(<4 x p1>) ... --- @@ -784,8 +784,8 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x p3>) = G_CONCAT_VECTORS %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + %2:sgpr(<4 x p3>) = G_CONCAT_VECTORS %0(<2 x p3>), %1(<2 x p3>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(<4 x p3>) ... --- @@ -810,6 +810,6 @@ body: | %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 %2:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5 %3:sgpr(<2 x p3>) = COPY $sgpr6_sgpr7 - %4:sgpr(<8 x p3>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 + %4:sgpr(<8 x p3>) = G_CONCAT_VECTORS %0(<2 x p3>), %1(<2 x p3>), %2(<2 x p3>), %3(<2 x p3>) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4(<8 x p3>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir index 2368ea38e2d2b..3facfd64791c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -26,12 +26,12 @@ body: | ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] - %0:vgpr(s32) = G_CONSTANT i32 0 - %1:vgpr(s32) = G_CONSTANT i32 1 - %2:vgpr(s32) = G_CONSTANT i32 -1 - %3:vgpr(s32) = G_CONSTANT i32 -54 - %4:vgpr(s32) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + %0:vgpr(i32) = G_CONSTANT i32 0 + %1:vgpr(i32) = G_CONSTANT i32 1 + %2:vgpr(i32) = G_CONSTANT i32 -1 + %3:vgpr(i32) = G_CONSTANT i32 -54 + %4:vgpr(i32) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0(i32), implicit %1(i32), implicit %2(i32), implicit %3(i32), implicit %4(i32) ... --- @@ -57,12 +57,12 @@ body: | ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] - %0:sgpr(s32) = G_CONSTANT i32 0 - %1:sgpr(s32) = G_CONSTANT i32 1 - %2:sgpr(s32) = G_CONSTANT i32 -1 - %3:sgpr(s32) = G_CONSTANT i32 -54 - %4:sgpr(s32) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + %0:sgpr(i32) = G_CONSTANT i32 0 + %1:sgpr(i32) = G_CONSTANT i32 1 + %2:sgpr(i32) = G_CONSTANT i32 -1 + %3:sgpr(i32) = G_CONSTANT i32 -54 + %4:sgpr(i32) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0(i32), implicit %1(i32), implicit %2(i32), implicit %3(i32), implicit %4(i32) ... --- @@ -88,12 +88,12 @@ body: | ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] - %0:vgpr(s16) = G_CONSTANT i16 0 - %1:vgpr(s16) = G_CONSTANT i16 1 - %2:vgpr(s16) = G_CONSTANT i16 -1 - %3:vgpr(s16) = G_CONSTANT i16 -54 - %4:vgpr(s16) = G_CONSTANT i16 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + %0:vgpr(i16) = G_CONSTANT i16 0 + %1:vgpr(i16) = G_CONSTANT i16 1 + %2:vgpr(i16) = G_CONSTANT i16 -1 + %3:vgpr(i16) = G_CONSTANT i16 -54 + %4:vgpr(i16) = G_CONSTANT i16 27 + S_ENDPGM 0, implicit %0(i16), implicit %1(i16), implicit %2(i16), implicit %3(i16), implicit %4(i16) ... --- @@ -119,12 +119,12 @@ body: | ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] - %0:sgpr(s16) = G_CONSTANT i16 0 - %1:sgpr(s16) = G_CONSTANT i16 1 - %2:sgpr(s16) = G_CONSTANT i16 -1 - %3:sgpr(s16) = G_CONSTANT i16 -54 - %4:sgpr(s16) = G_CONSTANT i16 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + %0:sgpr(i16) = G_CONSTANT i16 0 + %1:sgpr(i16) = G_CONSTANT i16 1 + %2:sgpr(i16) = G_CONSTANT i16 -1 + %3:sgpr(i16) = G_CONSTANT i16 -54 + %4:sgpr(i16) = G_CONSTANT i16 27 + S_ENDPGM 0, implicit %0(i16), implicit %1(i16), implicit %2(i16), implicit %3(i16), implicit %4(i16) ... --- @@ -156,15 +156,15 @@ body: | ; WAVE32-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec ; WAVE32-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]] - %0:vgpr(s64) = G_CONSTANT i64 0 - %1:vgpr(s64) = G_CONSTANT i64 1 - %2:vgpr(s64) = G_CONSTANT i64 -1 - %3:vgpr(s64) = G_CONSTANT i64 -54 - %4:vgpr(s64) = G_CONSTANT i64 27 - %5:vgpr(s64) = G_CONSTANT i64 4294967295 - %6:vgpr(s64) = G_CONSTANT i64 4294967296 - %7:vgpr(s64) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %0:vgpr(i64) = G_CONSTANT i64 0 + %1:vgpr(i64) = G_CONSTANT i64 1 + %2:vgpr(i64) = G_CONSTANT i64 -1 + %3:vgpr(i64) = G_CONSTANT i64 -54 + %4:vgpr(i64) = G_CONSTANT i64 27 + %5:vgpr(i64) = G_CONSTANT i64 4294967295 + %6:vgpr(i64) = G_CONSTANT i64 4294967296 + %7:vgpr(i64) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(i64), implicit %1(i64), implicit %2(i64), implicit %3(i64), implicit %4(i64), implicit %5(i64), implicit %6(i64), implicit %7(i64) ... --- @@ -196,15 +196,15 @@ body: | ; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296 ; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481 ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]] - %0:sgpr(s64) = G_CONSTANT i64 0 - %1:sgpr(s64) = G_CONSTANT i64 1 - %2:sgpr(s64) = G_CONSTANT i64 -1 - %3:sgpr(s64) = G_CONSTANT i64 -54 - %4:sgpr(s64) = G_CONSTANT i64 27 - %5:sgpr(s64) = G_CONSTANT i64 4294967295 - %6:sgpr(s64) = G_CONSTANT i64 4294967296 - %7:sgpr(s64) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %0:sgpr(i64) = G_CONSTANT i64 0 + %1:sgpr(i64) = G_CONSTANT i64 1 + %2:sgpr(i64) = G_CONSTANT i64 -1 + %3:sgpr(i64) = G_CONSTANT i64 -54 + %4:sgpr(i64) = G_CONSTANT i64 27 + %5:sgpr(i64) = G_CONSTANT i64 4294967295 + %6:sgpr(i64) = G_CONSTANT i64 4294967296 + %7:sgpr(i64) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(i64), implicit %1(i64), implicit %2(i64), implicit %3(i64), implicit %4(i64), implicit %5(i64), implicit %6(i64), implicit %7(i64) ... --- @@ -225,9 +225,9 @@ body: | ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 -1 ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]] - %0:vcc(s1) = G_CONSTANT i1 true - %1:vcc(s1) = G_CONSTANT i1 false - S_ENDPGM 0 , implicit %0 , implicit %1 + %0:vcc(i1) = G_CONSTANT i1 true + %1:vcc(i1) = G_CONSTANT i1 false + S_ENDPGM 0, implicit %0(i1), implicit %1(i1) ... @@ -260,7 +260,7 @@ body: | %2:sgpr(p3) = G_CONSTANT i32 -1 %3:sgpr(p3) = G_CONSTANT i32 -54 %4:sgpr(p3) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p3), implicit %1(p3), implicit %2(p3), implicit %3(p3), implicit %4(p3) ... --- @@ -291,7 +291,7 @@ body: | %2:vgpr(p3) = G_CONSTANT i32 -1 %3:vgpr(p3) = G_CONSTANT i32 -54 %4:vgpr(p3) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p3), implicit %1(p3), implicit %2(p3), implicit %3(p3), implicit %4(p3) ... --- @@ -323,7 +323,7 @@ body: | %2:sgpr(p2) = G_CONSTANT i32 -1 %3:sgpr(p2) = G_CONSTANT i32 -54 %4:sgpr(p2) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p2), implicit %1(p2), implicit %2(p2), implicit %3(p2), implicit %4(p2) ... --- @@ -354,7 +354,7 @@ body: | %2:vgpr(p2) = G_CONSTANT i32 -1 %3:vgpr(p2) = G_CONSTANT i32 -54 %4:vgpr(p2) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p2), implicit %1(p2), implicit %2(p2), implicit %3(p2), implicit %4(p2) ... --- @@ -386,7 +386,7 @@ body: | %2:sgpr(p5) = G_CONSTANT i32 -1 %3:sgpr(p5) = G_CONSTANT i32 -54 %4:sgpr(p5) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p5), implicit %1(p5), implicit %2(p5), implicit %3(p5), implicit %4(p5) ... --- @@ -417,7 +417,7 @@ body: | %2:vgpr(p5) = G_CONSTANT i32 -1 %3:vgpr(p5) = G_CONSTANT i32 -54 %4:vgpr(p5) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p5), implicit %1(p5), implicit %2(p5), implicit %3(p5), implicit %4(p5) ... --- @@ -449,7 +449,7 @@ body: | %2:sgpr(p6) = G_CONSTANT i32 -1 %3:sgpr(p6) = G_CONSTANT i32 -54 %4:sgpr(p6) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p6), implicit %1(p6), implicit %2(p6), implicit %3(p6), implicit %4(p6) ... --- @@ -480,7 +480,7 @@ body: | %2:vgpr(p6) = G_CONSTANT i32 -1 %3:vgpr(p6) = G_CONSTANT i32 -54 %4:vgpr(p6) = G_CONSTANT i32 27 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 + S_ENDPGM 0, implicit %0(p6), implicit %1(p6), implicit %2(p6), implicit %3(p6), implicit %4(p6) ... --- @@ -519,8 +519,8 @@ body: | %4:sgpr(p1) = G_CONSTANT i64 27 %5:sgpr(p1) = G_CONSTANT i64 4294967295 %6:sgpr(p1) = G_CONSTANT i64 4294967296 - %7:sgpr(p1) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %7:sgpr(p1) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(p1), implicit %1(p1), implicit %2(p1), implicit %3(p1), implicit %4(p1), implicit %5(p1), implicit %6(p1), implicit %7(p1) ... --- @@ -559,8 +559,8 @@ body: | %4:vgpr(p1) = G_CONSTANT i64 27 %5:vgpr(p1) = G_CONSTANT i64 4294967295 %6:vgpr(p1) = G_CONSTANT i64 4294967296 - %7:vgpr(p1) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %7:vgpr(p1) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(p1), implicit %1(p1), implicit %2(p1), implicit %3(p1), implicit %4(p1), implicit %5(p1), implicit %6(p1), implicit %7(p1) ... --- @@ -599,8 +599,8 @@ body: | %4:sgpr(p0) = G_CONSTANT i64 27 %5:sgpr(p0) = G_CONSTANT i64 4294967295 %6:sgpr(p0) = G_CONSTANT i64 4294967296 - %7:sgpr(p0) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %7:sgpr(p0) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(p0), implicit %1(p0), implicit %2(p0), implicit %3(p0), implicit %4(p0), implicit %5(p0), implicit %6(p0), implicit %7(p0) ... --- @@ -639,8 +639,8 @@ body: | %4:vgpr(p0) = G_CONSTANT i64 27 %5:vgpr(p0) = G_CONSTANT i64 4294967295 %6:vgpr(p0) = G_CONSTANT i64 4294967296 - %7:vgpr(p0) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %7:vgpr(p0) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(p0), implicit %1(p0), implicit %2(p0), implicit %3(p0), implicit %4(p0), implicit %5(p0), implicit %6(p0), implicit %7(p0) ... --- name: constant_s_p4 @@ -678,8 +678,8 @@ body: | %4:sgpr(p4) = G_CONSTANT i64 27 %5:sgpr(p4) = G_CONSTANT i64 4294967295 %6:sgpr(p4) = G_CONSTANT i64 4294967296 - %7:sgpr(p4) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %7:sgpr(p4) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(p4), implicit %1(p4), implicit %2(p4), implicit %3(p4), implicit %4(p4), implicit %5(p4), implicit %6(p4), implicit %7(p4) ... --- @@ -718,8 +718,8 @@ body: | %4:vgpr(p4) = G_CONSTANT i64 27 %5:vgpr(p4) = G_CONSTANT i64 4294967295 %6:vgpr(p4) = G_CONSTANT i64 4294967296 - %7:vgpr(p4) = G_CONSTANT i64 18446744004990098135 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 + %7:vgpr(p4) = G_CONSTANT i64 -68719453481 + S_ENDPGM 0, implicit %0(p4), implicit %1(p4), implicit %2(p4), implicit %3(p4), implicit %4(p4), implicit %5(p4), implicit %6(p4), implicit %7(p4) ... --- @@ -760,13 +760,18 @@ body: | ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: bb.0: - %0:sgpr(s1) = G_CONSTANT i1 true - %1:sgpr(s32) = G_ZEXT %0 - G_BRCOND %1, %bb.1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + %0:sgpr(i1) = G_CONSTANT i1 true + %1:sgpr(i32) = G_ZEXT %0(i1) + G_BRCOND %1(i32), %bb.1 G_BR %bb.2 bb.1: + successors: %bb.2(0x80000000) bb.2: + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index e07d635855cfe..3c19da3c998af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -20,7 +20,7 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; WAVE64-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; WAVE32-LABEL: name: copy ; WAVE32: liveins: $sgpr2_sgpr3 @@ -28,11 +28,11 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:sgpr(p1) = COPY $sgpr2_sgpr3 - %1:vgpr(p1) = COPY %0 - %2:vgpr(s32) = G_IMPLICIT_DEF - G_STORE %2, %1 :: (store (s32), addrspace 1) + %1:vgpr(p1) = COPY %0(p1) + %2:vgpr(i32) = G_IMPLICIT_DEF + G_STORE %2(i32), %1(p1) :: (store (i32), addrspace 1) ... --- @@ -54,7 +54,7 @@ body: | ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc @@ -66,14 +66,14 @@ body: | ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:sgpr(s1) = COPY $scc - %4:vcc(s1) = COPY %3 - %5:vgpr(s32) = G_SELECT %4, %1, %2 - G_STORE %5, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:sgpr(i1) = COPY $scc + %4:vcc(i1) = COPY %3(i1) + %5:vgpr(i32) = G_SELECT %4(i1), %1, %2 + G_STORE %5(i32), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -98,7 +98,7 @@ body: | ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc @@ -112,16 +112,16 @@ body: | ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:sgpr(s1) = COPY $scc - %4:vcc(s1) = COPY %3 - %5:vgpr(s32) = G_SELECT %4, %1, %2 - %6:vcc(s1) = COPY %3 - %7:vgpr(s32) = G_SELECT %6, %1, %5 - G_STORE %7, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:sgpr(i1) = COPY $scc + %4:vcc(i1) = COPY %3(i1) + %5:vgpr(i32) = G_SELECT %4(i1), %1, %2 + %6:vcc(i1) = COPY %3(i1) + %7:vgpr(i32) = G_SELECT %6(i1), %1, %5 + G_STORE %7(i32), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -142,7 +142,7 @@ body: | ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc @@ -152,13 +152,13 @@ body: | ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vcc(s1) = COPY $scc - %5:vgpr(s32) = G_SELECT %3, %1, %2 - G_STORE %5, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vcc(i1) = COPY $scc + %4:vgpr(i32) = G_SELECT %3(i1), %1, %2 + G_STORE %4(i32), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -266,10 +266,10 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vcc(s1) = G_TRUNC %0 - %2:vcc(s1) = COPY %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vcc(i1) = G_TRUNC %0(i32) + %2:vcc(i1) = COPY %1(i1) + S_ENDPGM 0, implicit %2(i1) ... @@ -296,8 +296,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: $vcc = COPY [[COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit $vcc_lo - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - $vcc = COPY %0 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + $vcc = COPY %0(i64) S_ENDPGM 0, implicit $vcc ... @@ -325,8 +325,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: $vcc_lo = COPY [[COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit $vcc_lo - %0:sgpr(s32) = COPY $sgpr0 - $vcc_lo = COPY %0 + %0:sgpr(i32) = COPY $sgpr0 + $vcc_lo = COPY %0(i32) S_ENDPGM 0, implicit $vcc ... @@ -352,8 +352,8 @@ body: | ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $vcc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:sgpr(s64) = COPY $vcc - S_ENDPGM 0, implicit %0 + %0:sgpr(i64) = COPY $vcc + S_ENDPGM 0, implicit %0(i64) ... @@ -378,8 +378,8 @@ body: | ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $vcc_lo ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:sgpr(s32) = COPY $vcc_lo - S_ENDPGM 0, implicit %0 + %0:sgpr(i32) = COPY $vcc_lo + S_ENDPGM 0, implicit %0(i32) ... @@ -410,10 +410,10 @@ body: | ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s1) = G_TRUNC %0(s64) - %2:vcc(s1) = COPY %1(s1) - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i1) = G_TRUNC %0(i64) + %2:vcc(i1) = COPY %1(i1) + S_ENDPGM 0, implicit %2(i1) ... @@ -438,9 +438,9 @@ body: | ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s1) = G_CONSTANT i1 false - %1:vcc(s1) = COPY %0(s1) - S_ENDPGM 0, implicit %1 + %0:sgpr(i1) = G_CONSTANT i1 false + %1:vcc(i1) = COPY %0(i1) + S_ENDPGM 0, implicit %1(i1) ... @@ -465,8 +465,8 @@ body: | ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] - %0:sgpr(s1) = G_CONSTANT i1 true - %1:vcc(s1) = COPY %0(s1) - S_ENDPGM 0, implicit %1 + %0:sgpr(i1) = G_CONSTANT i1 true + %1:vcc(i1) = COPY %0(i1) + S_ENDPGM 0, implicit %1(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir index 6820077ad4870..47d1580b7d3ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir @@ -17,9 +17,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CTLZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -38,9 +38,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_CTLZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -59,9 +59,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CTLZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -80,7 +80,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = G_CTLZ_ZERO_UNDEF %0(i64) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir index 3a2ed71e4d224..54a6a716de12e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -17,9 +17,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BCNT1_I32_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CTPOP %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -38,9 +38,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_CTPOP %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -59,9 +59,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CTPOP %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -81,11 +81,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CTPOP %0 - %3:vgpr(s32) = G_ADD %2, %1 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CTPOP %0(i32) + %3:vgpr(i32) = G_ADD %2, %1 + S_ENDPGM 0, implicit %3(i32) ... --- @@ -105,11 +105,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CTPOP %0 - %3:vgpr(s32) = G_ADD %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CTPOP %0(i32) + %3:vgpr(i32) = G_ADD %1, %2 + S_ENDPGM 0, implicit %3(i32) ... # Test add+ctpop pattern with all scalars. This should stay scalar. @@ -131,11 +131,11 @@ body: | ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CTPOP %0 - %3:sgpr(s32) = G_ADD %2, %1 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CTPOP %0(i32) + %3:sgpr(i32) = G_ADD %2, %1 + S_ENDPGM 0, implicit %3(i32) ... --- @@ -155,11 +155,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_CTPOP %0 - %3:vgpr(s32) = G_ADD %2, %1 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_CTPOP %0(i32) + %3:vgpr(i32) = G_ADD %2, %1 + S_ENDPGM 0, implicit %3(i32) ... # SGPR->VGPR ctpop with VALU add @@ -180,11 +180,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_CTPOP %1 - %3:vgpr(s32) = G_ADD %2, %0 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_CTPOP %1(i32) + %3:vgpr(i32) = G_ADD %2, %0 + S_ENDPGM 0, implicit %3(i32) ... # Scalar ctpop with VALU add @@ -205,11 +205,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CTPOP %0 - %3:vgpr(s32) = G_ADD %2, %1 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i32) = G_CTPOP %0(i32) + %3:vgpr(i32) = G_ADD %2, %1 + S_ENDPGM 0, implicit %3(i32) ... --- @@ -228,7 +228,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_BCNT1_I32_B64_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 [[COPY]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BCNT1_I32_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = G_CTPOP %0(i64) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir index a0d5db38813a3..ac7e292cfed1d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir @@ -17,9 +17,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_CTTZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -38,9 +38,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = G_CTTZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -59,9 +59,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CTTZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -80,7 +80,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = G_CTTZ_ZERO_UNDEF %0(i64) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir index b59c98fde4f34..d20b925d0334e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -32,10 +32,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -64,10 +64,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<3 x i32>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<3 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -96,10 +96,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = COPY $sgpr4 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = COPY $sgpr4 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<4 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -128,10 +128,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -160,10 +160,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -192,10 +192,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %1:sgpr(s32) = COPY $sgpr40 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(i32) = COPY $sgpr40 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<32 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -224,10 +224,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = COPY $sgpr4 - %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = COPY $sgpr4 + %2:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<2 x i64>), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -256,10 +256,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<4 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<4 x i64>), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -288,10 +288,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -320,10 +320,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %1:sgpr(s32) = COPY $sgpr40 - %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<16 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(i32) = COPY $sgpr40 + %2:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<16 x i64>), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -352,12 +352,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 1 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 1 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -390,12 +390,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 -1 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 -1 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -424,12 +424,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 7 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 7 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -462,12 +462,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 8 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 8 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -496,12 +496,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 1 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 1 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %3(i32) + S_ENDPGM 0, implicit %4(i64) ... --- @@ -530,12 +530,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 2 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 2 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %3(i32) + S_ENDPGM 0, implicit %4(i64) ... --- @@ -568,12 +568,12 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] - %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 -1 - %3:sgpr(s32) = G_ADD %1, %2 - %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 -1 + %3:sgpr(i32) = G_ADD %1, %2 + %4:sgpr(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %3(i32) + S_ENDPGM 0, implicit %4(i64) ... --- @@ -601,10 +601,10 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_]] - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -632,10 +632,10 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_]] - %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:sgpr(s32) = COPY $sgpr2 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:sgpr(i32) = COPY $sgpr2 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<3 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -663,10 +663,10 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] - %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:sgpr(s32) = COPY $sgpr4 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:sgpr(i32) = COPY $sgpr4 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<4 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -694,10 +694,10 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -725,10 +725,10 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] - %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:sgpr(s32) = COPY $sgpr8 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:sgpr(i32) = COPY $sgpr8 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -756,10 +756,10 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_]] - %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:sgpr(s32) = COPY $sgpr40 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<32 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:sgpr(i32) = COPY $sgpr40 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<32 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -787,12 +787,12 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 11, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 1 - %3:sgpr(s32) = G_ADD %1, %2 - %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 1 + %3:sgpr(i32) = G_ADD %1, %2 + %4:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -824,12 +824,12 @@ body: | ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 -1 - %3:sgpr(s32) = G_ADD %1, %2 - %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 -1 + %3:sgpr(i32) = G_ADD %1, %2 + %4:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -857,12 +857,12 @@ body: | ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 71, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 7 - %3:sgpr(s32) = G_ADD %1, %2 - %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 7 + %3:sgpr(i32) = G_ADD %1, %2 + %4:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -894,12 +894,12 @@ body: | ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = G_CONSTANT i32 8 - %3:sgpr(s32) = G_ADD %1, %2 - %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = G_CONSTANT i32 8 + %3:sgpr(i32) = G_ADD %1, %2 + %4:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %3(i32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -928,10 +928,10 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[S_MOV_B32_]] ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = G_CONSTANT i32 0 + %2:sgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<4 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -959,8 +959,8 @@ body: | ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] - %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = G_CONSTANT i32 0 + %2:vgpr(i32) = G_EXTRACT_VECTOR_ELT %0(<4 x i32>), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir index b8ef754d5de5e..dce3fa0a151eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -42,39 +42,39 @@ body: | ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]] ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15 - %0:sgpr(s512) = G_IMPLICIT_DEF - %1:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 0 - %2:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 32 - %3:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 64 - %4:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 96 - %5:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 128 - %6:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 160 - %7:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 192 - %8:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 224 - %9:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 256 - %10:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 288 - %11:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 320 - %12:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 352 - %13:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 384 - %14:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 416 - %15:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 448 - %16:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 480 - $sgpr0 = COPY %1:sgpr(s32) - $sgpr1 = COPY %2:sgpr(s32) - $sgpr2 = COPY %3:sgpr(s32) - $sgpr3 = COPY %4:sgpr(s32) - $sgpr4 = COPY %5:sgpr(s32) - $sgpr5 = COPY %6:sgpr(s32) - $sgpr6 = COPY %7:sgpr(s32) - $sgpr7 = COPY %8:sgpr(s32) - $sgpr8 = COPY %9:sgpr(s32) - $sgpr9 = COPY %10:sgpr(s32) - $sgpr10 = COPY %11:sgpr(s32) - $sgpr11 = COPY %12:sgpr(s32) - $sgpr12 = COPY %13:sgpr(s32) - $sgpr13 = COPY %14:sgpr(s32) - $sgpr14 = COPY %15:sgpr(s32) - $sgpr15 = COPY %16:sgpr(s32) + %0:sgpr(i512) = G_IMPLICIT_DEF + %1:sgpr(i32) = G_EXTRACT %0(i512), 0 + %2:sgpr(i32) = G_EXTRACT %0(i512), 32 + %3:sgpr(i32) = G_EXTRACT %0(i512), 64 + %4:sgpr(i32) = G_EXTRACT %0(i512), 96 + %5:sgpr(i32) = G_EXTRACT %0(i512), 128 + %6:sgpr(i32) = G_EXTRACT %0(i512), 160 + %7:sgpr(i32) = G_EXTRACT %0(i512), 192 + %8:sgpr(i32) = G_EXTRACT %0(i512), 224 + %9:sgpr(i32) = G_EXTRACT %0(i512), 256 + %10:sgpr(i32) = G_EXTRACT %0(i512), 288 + %11:sgpr(i32) = G_EXTRACT %0(i512), 320 + %12:sgpr(i32) = G_EXTRACT %0(i512), 352 + %13:sgpr(i32) = G_EXTRACT %0(i512), 384 + %14:sgpr(i32) = G_EXTRACT %0(i512), 416 + %15:sgpr(i32) = G_EXTRACT %0(i512), 448 + %16:sgpr(i32) = G_EXTRACT %0(i512), 480 + $sgpr0 = COPY %1(i32) + $sgpr1 = COPY %2(i32) + $sgpr2 = COPY %3(i32) + $sgpr3 = COPY %4(i32) + $sgpr4 = COPY %5(i32) + $sgpr5 = COPY %6(i32) + $sgpr6 = COPY %7(i32) + $sgpr7 = COPY %8(i32) + $sgpr8 = COPY %9(i32) + $sgpr9 = COPY %10(i32) + $sgpr10 = COPY %11(i32) + $sgpr11 = COPY %12(i32) + $sgpr12 = COPY %13(i32) + $sgpr13 = COPY %14(i32) + $sgpr14 = COPY %15(i32) + $sgpr15 = COPY %16(i32) SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15 ... @@ -120,42 +120,42 @@ body: | ; CHECK-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub30 ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub31 ; CHECK-NEXT: S_ENDPGM 0, implicit [[DEF]], implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]], implicit [[COPY10]], implicit [[COPY11]], implicit [[COPY12]], implicit [[COPY13]], implicit [[COPY14]], implicit [[COPY15]], implicit [[COPY16]], implicit [[COPY17]], implicit [[COPY18]], implicit [[COPY19]], implicit [[COPY20]], implicit [[COPY21]], implicit [[COPY22]], implicit [[COPY23]], implicit [[COPY24]], implicit [[COPY25]], implicit [[COPY26]], implicit [[COPY27]], implicit [[COPY28]], implicit [[COPY29]], implicit [[COPY30]], implicit [[COPY31]] - %0:sgpr(s1024) = G_IMPLICIT_DEF - %1:sgpr(s32) = G_EXTRACT %0:sgpr, 0 - %2:sgpr(s32) = G_EXTRACT %0:sgpr, 32 - %3:sgpr(s32) = G_EXTRACT %0:sgpr, 64 - %4:sgpr(s32) = G_EXTRACT %0:sgpr, 96 - %5:sgpr(s32) = G_EXTRACT %0:sgpr, 128 - %6:sgpr(s32) = G_EXTRACT %0:sgpr, 160 - %7:sgpr(s32) = G_EXTRACT %0:sgpr, 192 - %8:sgpr(s32) = G_EXTRACT %0:sgpr, 224 - %9:sgpr(s32) = G_EXTRACT %0:sgpr, 256 - %10:sgpr(s32) = G_EXTRACT %0:sgpr, 288 - %11:sgpr(s32) = G_EXTRACT %0:sgpr, 320 - %12:sgpr(s32) = G_EXTRACT %0:sgpr, 352 - %13:sgpr(s32) = G_EXTRACT %0:sgpr, 384 - %14:sgpr(s32) = G_EXTRACT %0:sgpr, 416 - %15:sgpr(s32) = G_EXTRACT %0:sgpr, 448 - %16:sgpr(s32) = G_EXTRACT %0:sgpr, 480 + %0:sgpr(i1024) = G_IMPLICIT_DEF + %1:sgpr(i32) = G_EXTRACT %0(i1024), 0 + %2:sgpr(i32) = G_EXTRACT %0(i1024), 32 + %3:sgpr(i32) = G_EXTRACT %0(i1024), 64 + %4:sgpr(i32) = G_EXTRACT %0(i1024), 96 + %5:sgpr(i32) = G_EXTRACT %0(i1024), 128 + %6:sgpr(i32) = G_EXTRACT %0(i1024), 160 + %7:sgpr(i32) = G_EXTRACT %0(i1024), 192 + %8:sgpr(i32) = G_EXTRACT %0(i1024), 224 + %9:sgpr(i32) = G_EXTRACT %0(i1024), 256 + %10:sgpr(i32) = G_EXTRACT %0(i1024), 288 + %11:sgpr(i32) = G_EXTRACT %0(i1024), 320 + %12:sgpr(i32) = G_EXTRACT %0(i1024), 352 + %13:sgpr(i32) = G_EXTRACT %0(i1024), 384 + %14:sgpr(i32) = G_EXTRACT %0(i1024), 416 + %15:sgpr(i32) = G_EXTRACT %0(i1024), 448 + %16:sgpr(i32) = G_EXTRACT %0(i1024), 480 + %17:sgpr(i32) = G_EXTRACT %0(i1024), 512 + %18:sgpr(i32) = G_EXTRACT %0(i1024), 544 + %19:sgpr(i32) = G_EXTRACT %0(i1024), 576 + %20:sgpr(i32) = G_EXTRACT %0(i1024), 608 + %21:sgpr(i32) = G_EXTRACT %0(i1024), 640 + %22:sgpr(i32) = G_EXTRACT %0(i1024), 672 + %23:sgpr(i32) = G_EXTRACT %0(i1024), 704 + %24:sgpr(i32) = G_EXTRACT %0(i1024), 736 + %25:sgpr(i32) = G_EXTRACT %0(i1024), 768 + %26:sgpr(i32) = G_EXTRACT %0(i1024), 800 + %27:sgpr(i32) = G_EXTRACT %0(i1024), 832 + %28:sgpr(i32) = G_EXTRACT %0(i1024), 864 + %29:sgpr(i32) = G_EXTRACT %0(i1024), 896 + %30:sgpr(i32) = G_EXTRACT %0(i1024), 928 + %31:sgpr(i32) = G_EXTRACT %0(i1024), 960 + %32:sgpr(i32) = G_EXTRACT %0(i1024), 992 + S_ENDPGM 0, implicit %0(i1024), implicit %1(i32), implicit %2(i32), implicit %3(i32), implicit %4(i32), implicit %5(i32), implicit %6(i32), implicit %7(i32), implicit %8(i32), implicit %9(i32), implicit %10(i32), implicit %11(i32), implicit %12(i32), implicit %13(i32), implicit %14(i32), implicit %15(i32), implicit %16(i32), implicit %17(i32), implicit %18(i32), implicit %19(i32), implicit %20(i32), implicit %21(i32), implicit %22(i32), implicit %23(i32), implicit %24(i32), implicit %25(i32), implicit %26(i32), implicit %27(i32), implicit %28(i32), implicit %29(i32), implicit %30(i32), implicit %31(i32), implicit %32(i32) - %17:sgpr(s32) = G_EXTRACT %0:sgpr, 512 - %18:sgpr(s32) = G_EXTRACT %0:sgpr, 544 - %19:sgpr(s32) = G_EXTRACT %0:sgpr, 576 - %20:sgpr(s32) = G_EXTRACT %0:sgpr, 608 - %21:sgpr(s32) = G_EXTRACT %0:sgpr, 640 - %22:sgpr(s32) = G_EXTRACT %0:sgpr, 672 - %23:sgpr(s32) = G_EXTRACT %0:sgpr, 704 - %24:sgpr(s32) = G_EXTRACT %0:sgpr, 736 - %25:sgpr(s32) = G_EXTRACT %0:sgpr, 768 - %26:sgpr(s32) = G_EXTRACT %0:sgpr, 800 - %27:sgpr(s32) = G_EXTRACT %0:sgpr, 832 - %28:sgpr(s32) = G_EXTRACT %0:sgpr, 864 - %29:sgpr(s32) = G_EXTRACT %0:sgpr, 896 - %30:sgpr(s32) = G_EXTRACT %0:sgpr, 928 - %31:sgpr(s32) = G_EXTRACT %0:sgpr, 960 - %32:sgpr(s32) = G_EXTRACT %0:sgpr, 992 - S_ENDPGM 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31, implicit %32 ... # TODO: Handle offset 32 @@ -171,10 +171,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]] - %0:sgpr(s128) = G_IMPLICIT_DEF - %1:sgpr(s64) = G_EXTRACT %0, 0 - %2:sgpr(s64) = G_EXTRACT %0, 64 - S_ENDPGM 0, implicit %1, implicit %2 + %0:sgpr(i128) = G_IMPLICIT_DEF + %1:sgpr(i64) = G_EXTRACT %0(i128), 0 + %2:sgpr(i64) = G_EXTRACT %0(i128), 64 + S_ENDPGM 0, implicit %1(i64), implicit %2(i64) ... @@ -192,8 +192,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s96) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i96) = G_EXTRACT %0(i128), 0 + S_ENDPGM 0, implicit %1(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir index ca75fd207607a..226d21bd60a47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -45,9 +45,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FABS %0 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FABS %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $sgpr0 = COPY %3(i32) ... --- @@ -90,9 +92,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FABS %0 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FABS %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -107,33 +111,43 @@ body: | ; SI-LABEL: name: fabs_s32_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]](f32) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; ; VI-LABEL: name: fabs_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]](f32) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; ; GFX9-LABEL: name: fabs_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; ; GFX10-LABEL: name: fabs_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FABS %0 - $vgpr0 = COPY %1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FABS %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -176,9 +190,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = G_FABS %0 - $sgpr0 = COPY %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:sgpr(<2 x f16>) = G_FABS %1 + %3:sgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $sgpr0 = COPY %3(<2 x i16>) ... --- @@ -221,11 +237,13 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FABS %1 - %3:sgpr(s32) = G_ANYEXT %2 - $sgpr0 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FABS %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -268,11 +286,13 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FABS %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FABS %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -288,43 +308,53 @@ body: | ; SI-LABEL: name: fabs_s16_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f16) = G_FABS [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FABS]](f16) + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; VI-LABEL: name: fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f16) = G_FABS [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FABS]](f16) + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; GFX9-LABEL: name: fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f16) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FABS]](f16) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; GFX10-LABEL: name: fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FABS %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f16) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FABS]](f16) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FABS %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -367,9 +397,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_FABS %0 - $vgpr0 = COPY %1 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FABS %1 + %3:vgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -384,33 +416,43 @@ body: | ; SI-LABEL: name: fabs_v2s16_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FABS]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; VI-LABEL: name: fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FABS]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; GFX9-LABEL: name: fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FABS]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; GFX10-LABEL: name: fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = G_FABS %0 - $vgpr0 = COPY %1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FABS]](<2 x f16>) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FABS %1 + %3:vgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -469,9 +511,11 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_FABS %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:sgpr(f64) = G_FABS %1 + %3:sgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -526,9 +570,11 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FABS %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -543,33 +589,43 @@ body: | ; SI-LABEL: name: fabs_s64_vs ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] - ; SI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FABS]](f64) + ; SI-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) ; ; VI-LABEL: name: fabs_s64_vs ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] - ; VI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FABS]](f64) + ; VI-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) ; ; GFX9-LABEL: name: fabs_s64_vs ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] - ; GFX9-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FABS]](f64) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) ; ; GFX10-LABEL: name: fabs_s64_vs ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_FABS %0 - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FABS]](f64) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... # Make sure the source register is constrained @@ -625,9 +681,11 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s64) = IMPLICIT_DEF - %1:vgpr(s64) = G_FABS %0:vgpr(s64) - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = IMPLICIT_DEF + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -686,7 +744,9 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s64) = IMPLICIT_DEF - %1:sgpr(s64) = G_FABS %0:sgpr(s64) - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = IMPLICIT_DEF + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:sgpr(f64) = G_FABS %1 + %3:sgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir index 189749a70d9b1..713898b8aa3c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir @@ -15,14 +15,17 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FADD %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FADD %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... @@ -40,14 +43,17 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FADD %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FADD %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... @@ -65,14 +71,17 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FADD %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FADD %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... @@ -90,15 +99,18 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FABS %2 - %5:vgpr(s16) = G_FADD %4, %3 - S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_FABS %4 + %6:vgpr(f16) = G_BITCAST %3(i16) + %7:vgpr(f16) = G_FADD %5, %6 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... @@ -116,15 +128,18 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 2, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FABS %3 - %5:vgpr(s16) = G_FADD %2, %4 - S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 2, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FABS %4 + %6:vgpr(f16) = G_BITCAST %2(i16) + %7:vgpr(f16) = G_FADD %6, %5 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... @@ -142,16 +157,19 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F16_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %6 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FABS %2 - %5:vgpr(s16) = G_FNEG %4 - %6:vgpr(s16) = G_FADD %5, %3 - S_ENDPGM 0, implicit %6 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_FABS %4 + %6:vgpr(f16) = G_FNEG %5 + %7:vgpr(f16) = G_BITCAST %3(i16) + %8:vgpr(f16) = G_FADD %6, %7 + %9:vgpr(i16) = G_BITCAST %8(f16) + S_ENDPGM 0, implicit %9(i16) ... @@ -169,16 +187,19 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %6 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FABS %3 - %5:vgpr(s16) = G_FNEG %4 - %6:vgpr(s16) = G_FADD %2, %5 - S_ENDPGM 0, implicit %6 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FABS %4 + %6:vgpr(f16) = G_FNEG %5 + %7:vgpr(f16) = G_BITCAST %2(i16) + %8:vgpr(f16) = G_FADD %7, %6 + %9:vgpr(i16) = G_BITCAST %8(f16) + S_ENDPGM 0, implicit %9(i16) ... @@ -196,14 +217,17 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_FNEG %3 - %5:vgpr(s16) = G_FADD %2, %4 - S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(f16) = G_BITCAST %3(i16) + %5:sgpr(f16) = G_FNEG %4 + %6:vgpr(f16) = G_BITCAST %2(i16) + %7:vgpr(f16) = G_FADD %6, %5 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir index 63e469621edef..27f494b1688c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir @@ -15,12 +15,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_FADD %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(f32) = G_FADD %2, %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... @@ -38,12 +41,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_FADD %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(f32) = G_FADD %2, %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... @@ -61,12 +67,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_FADD %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(f32) = G_FADD %2, %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... @@ -84,13 +93,16 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_FABS %0 - %3:vgpr(s32) = G_FADD %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_FABS %2 + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_FADD %3, %4 + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... @@ -107,13 +119,16 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_FABS %1 - %3:vgpr(s32) = G_FADD %1, %2 - S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:vgpr(f32) = G_FABS %2 + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_FADD %4, %3 + %6:vgpr(i32) = G_BITCAST %5(f32) + S_ENDPGM 0, implicit %6(i32) ... @@ -131,14 +146,17 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_FABS %0 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = G_FADD %3, %1 - S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_FABS %2 + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %1(i32) + %6:vgpr(f32) = G_FADD %4, %5 + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... @@ -155,14 +173,17 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_FABS %1 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = G_FADD %1, %3 - S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %1(i32) + %3:vgpr(f32) = G_FABS %2 + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %1(i32) + %6:vgpr(f32) = G_FADD %5, %4 + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... @@ -181,15 +202,22 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = G_FNEG %1 - %3:vgpr(s32) = COPY %2 - %4:vgpr(s32) = G_FADD %0, %3 - S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(f32) = G_BITCAST %1(i32) + %3:sgpr(f32) = G_FNEG %2 + %4:sgpr(i32) = G_BITCAST %3(f32) + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(f32) = G_BITCAST %0(i32) + %7:vgpr(f32) = G_BITCAST %5(i32) + %8:vgpr(f32) = G_FADD %6, %7 + %9:vgpr(i32) = G_BITCAST %8(f32) + S_ENDPGM 0, implicit %9(i32) ... @@ -209,16 +237,27 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = G_FABS %1 - %3:sgpr(s32) = COPY %2 - %4:sgpr(s32) = G_FNEG %3 - %5:sgpr(s32) = COPY %4 - %6:vgpr(s32) = G_FADD %0, %5 - S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[S_XOR_B32_]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(f32) = G_BITCAST %1(i32) + %3:sgpr(f32) = G_FABS %2 + %4:sgpr(i32) = G_BITCAST %3(f32) + %5:sgpr(i32) = COPY %4(i32) + %6:sgpr(f32) = G_BITCAST %5(i32) + %7:sgpr(f32) = G_FNEG %6 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:sgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %0(i32) + %11:sgpr(f32) = G_BITCAST %9(i32) + %12:vgpr(f32) = G_FADD %10, %11 + %13:vgpr(i32) = G_BITCAST %12(f32) + S_ENDPGM 0, implicit %13(i32) ... @@ -240,18 +279,29 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FABS %0 - %3:sgpr(s32) = G_FABS %1 - %4:vgpr(s32) = COPY %2 - %5:vgpr(s32) = COPY %3 - %6:vgpr(s32) = G_FADD %4, %5 - S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX6-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_FABS %2 + %4:sgpr(f32) = G_BITCAST %1(i32) + %5:sgpr(f32) = G_FABS %4 + %6:sgpr(i32) = G_BITCAST %3(f32) + %7:vgpr(i32) = COPY %6(i32) + %8:sgpr(i32) = G_BITCAST %5(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %7(i32) + %11:vgpr(f32) = G_BITCAST %9(i32) + %12:vgpr(f32) = G_FADD %10, %11 + %13:vgpr(i32) = G_BITCAST %12(f32) + S_ENDPGM 0, implicit %13(i32) ... @@ -269,18 +319,29 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FNEG %0 - %3:sgpr(s32) = G_FNEG %1 - %4:vgpr(s32) = COPY %2 - %5:vgpr(s32) = COPY %3 - %6:vgpr(s32) = G_FADD %4, %5 - S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_FNEG %2 + %4:sgpr(f32) = G_BITCAST %1(i32) + %5:sgpr(f32) = G_FNEG %4 + %6:sgpr(i32) = G_BITCAST %3(f32) + %7:vgpr(i32) = COPY %6(i32) + %8:sgpr(i32) = G_BITCAST %5(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %7(i32) + %11:vgpr(f32) = G_BITCAST %9(i32) + %12:vgpr(f32) = G_FADD %10, %11 + %13:vgpr(i32) = G_BITCAST %12(f32) + S_ENDPGM 0, implicit %13(i32) ... @@ -298,19 +359,30 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %8:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %8 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FABS %0 - %3:sgpr(s32) = G_FABS %1 - %4:sgpr(s32) = G_FNEG %2 - %5:sgpr(s32) = G_FNEG %3 - %6:vgpr(s32) = COPY %4 - %7:vgpr(s32) = COPY %5 - %8:vgpr(s32) = G_FADD %6, %7 - S_ENDPGM 0, implicit %8 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_FABS %2 + %4:sgpr(f32) = G_BITCAST %1(i32) + %5:sgpr(f32) = G_FABS %4 + %6:sgpr(f32) = G_FNEG %3 + %7:sgpr(f32) = G_FNEG %5 + %8:sgpr(i32) = G_BITCAST %6(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:sgpr(i32) = G_BITCAST %7(f32) + %11:vgpr(i32) = COPY %10(i32) + %12:vgpr(f32) = G_BITCAST %9(i32) + %13:vgpr(f32) = G_BITCAST %11(i32) + %14:vgpr(f32) = G_FADD %12, %13 + %15:vgpr(i32) = G_BITCAST %14(f32) + S_ENDPGM 0, implicit %15(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir index 476c5001dcb21..927415274bc8e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir @@ -15,12 +15,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_FADD %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vgpr(f64) = G_FADD %2, %3 + %5:vgpr(i64) = G_BITCAST %4(f64) + S_ENDPGM 0, implicit %5(i64) ... @@ -38,12 +41,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = COPY $vgpr0_vgpr1 - %2:vgpr(s64) = G_FADD %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i64) = COPY $vgpr0_vgpr1 + %2:sgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vgpr(f64) = G_FADD %2, %3 + %5:vgpr(i64) = G_BITCAST %4(f64) + S_ENDPGM 0, implicit %5(i64) ... @@ -61,12 +67,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s64) = COPY $sgpr0_sgpr1 - %2:vgpr(s64) = G_FADD %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i64) = COPY $sgpr0_sgpr1 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:sgpr(f64) = G_BITCAST %1(i64) + %4:vgpr(f64) = G_FADD %2, %3 + %5:vgpr(i64) = G_BITCAST %4(f64) + S_ENDPGM 0, implicit %5(i64) ... @@ -84,13 +93,16 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %3:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_FABS %0 - %3:vgpr(s64) = G_FADD %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_FABS %2 + %4:vgpr(f64) = G_BITCAST %1(i64) + %5:vgpr(f64) = G_FADD %3, %4 + %6:vgpr(i64) = G_BITCAST %5(f64) + S_ENDPGM 0, implicit %6(i64) ... @@ -107,13 +119,16 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %3:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_FABS %1 - %3:vgpr(s64) = G_FADD %1, %2 - S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %1(i64) + %3:vgpr(f64) = G_FABS %2 + %4:vgpr(f64) = G_BITCAST %1(i64) + %5:vgpr(f64) = G_FADD %4, %3 + %6:vgpr(i64) = G_BITCAST %5(f64) + S_ENDPGM 0, implicit %6(i64) ... @@ -131,14 +146,17 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_FABS %0 - %3:vgpr(s64) = G_FNEG %2 - %4:vgpr(s64) = G_FADD %3, %1 - S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_FABS %2 + %4:vgpr(f64) = G_FNEG %3 + %5:vgpr(f64) = G_BITCAST %1(i64) + %6:vgpr(f64) = G_FADD %4, %5 + %7:vgpr(i64) = G_BITCAST %6(f64) + S_ENDPGM 0, implicit %7(i64) ... @@ -155,14 +173,17 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr0_vgpr1 - %2:vgpr(s64) = G_FABS %1 - %3:vgpr(s64) = G_FNEG %2 - %4:vgpr(s64) = G_FADD %1, %3 - S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr0_vgpr1 + %2:vgpr(f64) = G_BITCAST %1(i64) + %3:vgpr(f64) = G_FABS %2 + %4:vgpr(f64) = G_FNEG %3 + %5:vgpr(f64) = G_BITCAST %1(i64) + %6:vgpr(f64) = G_FADD %5, %4 + %7:vgpr(i64) = G_BITCAST %6(f64) + S_ENDPGM 0, implicit %7(i64) ... @@ -182,15 +203,26 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s64) = COPY $sgpr0_sgpr1 - %2:sgpr(s64) = G_FNEG %1 - %3:vgpr(s64) = COPY %2 - %4:vgpr(s64) = G_FADD %0, %3 - S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY1]].sub1 + ; GFX6-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY1]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i64) = COPY $sgpr0_sgpr1 + %2:sgpr(f64) = G_BITCAST %1(i64) + %3:sgpr(f64) = G_FNEG %2 + %4:sgpr(i64) = G_BITCAST %3(f64) + %5:vgpr(i64) = COPY %4(i64) + %6:vgpr(f64) = G_BITCAST %0(i64) + %7:vgpr(f64) = G_BITCAST %5(i64) + %8:vgpr(f64) = G_FADD %6, %7 + %9:vgpr(i64) = G_BITCAST %8(f64) + S_ENDPGM 0, implicit %9(i64) ... @@ -212,18 +244,37 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = G_FABS %0 - %3:sgpr(s64) = G_FABS %1 - %4:vgpr(s64) = COPY %2 - %5:vgpr(s64) = COPY %3 - %6:vgpr(s64) = G_FADD %4, %5 - S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]] + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY1]].sub1 + ; GFX6-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY5]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY1]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY8]], 0, [[COPY9]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(f64) = G_BITCAST %0(i64) + %3:sgpr(f64) = G_FABS %2 + %4:sgpr(f64) = G_BITCAST %1(i64) + %5:sgpr(f64) = G_FABS %4 + %6:sgpr(i64) = G_BITCAST %3(f64) + %7:vgpr(i64) = COPY %6(i64) + %8:sgpr(i64) = G_BITCAST %5(f64) + %9:vgpr(i64) = COPY %8(i64) + %10:vgpr(f64) = G_BITCAST %7(i64) + %11:vgpr(f64) = G_BITCAST %9(i64) + %12:vgpr(f64) = G_FADD %10, %11 + %13:vgpr(i64) = G_BITCAST %12(f64) + S_ENDPGM 0, implicit %13(i64) ... @@ -241,18 +292,37 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vreg_64 = nofpexcept V_ADD_F64_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = G_FNEG %0 - %3:sgpr(s64) = G_FNEG %1 - %4:vgpr(s64) = COPY %2 - %5:vgpr(s64) = COPY %3 - %6:vgpr(s64) = G_FADD %4, %5 - S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GFX6-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY1]].sub1 + ; GFX6-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY5]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_1]] + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY1]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY8]], 0, [[COPY9]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(f64) = G_BITCAST %0(i64) + %3:sgpr(f64) = G_FNEG %2 + %4:sgpr(f64) = G_BITCAST %1(i64) + %5:sgpr(f64) = G_FNEG %4 + %6:sgpr(i64) = G_BITCAST %3(f64) + %7:vgpr(i64) = COPY %6(i64) + %8:sgpr(i64) = G_BITCAST %5(f64) + %9:vgpr(i64) = COPY %8(i64) + %10:vgpr(f64) = G_BITCAST %7(i64) + %11:vgpr(f64) = G_BITCAST %9(i64) + %12:vgpr(f64) = G_FADD %10, %11 + %13:vgpr(i64) = G_BITCAST %12(f64) + S_ENDPGM 0, implicit %13(i64) ... @@ -270,19 +340,38 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %8:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %8 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = G_FABS %0 - %3:sgpr(s64) = G_FABS %1 - %4:sgpr(s64) = G_FNEG %2 - %5:sgpr(s64) = G_FNEG %3 - %6:vgpr(s64) = COPY %4 - %7:vgpr(s64) = COPY %5 - %8:vgpr(s64) = G_FADD %6, %7 - S_ENDPGM 0, implicit %8 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY1]].sub1 + ; GFX6-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY5]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_1]] + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY1]].sub0 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY8]], 0, [[COPY9]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(f64) = G_BITCAST %0(i64) + %3:sgpr(f64) = G_FABS %2 + %4:sgpr(f64) = G_BITCAST %1(i64) + %5:sgpr(f64) = G_FABS %4 + %6:sgpr(f64) = G_FNEG %3 + %7:sgpr(f64) = G_FNEG %5 + %8:sgpr(i64) = G_BITCAST %6(f64) + %9:vgpr(i64) = COPY %8(i64) + %10:sgpr(i64) = G_BITCAST %7(f64) + %11:vgpr(i64) = COPY %10(i64) + %12:vgpr(f64) = G_BITCAST %9(i64) + %13:vgpr(f64) = G_BITCAST %11(i64) + %14:vgpr(f64) = G_FADD %12, %13 + %15:vgpr(i64) = G_BITCAST %14(f64) + S_ENDPGM 0, implicit %15(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir index d32634806f7bd..ef18d1bd5374d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -53,10 +53,12 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FCANONICALIZE %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FCANONICALIZE %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -107,10 +109,12 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FCANONICALIZE %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FCANONICALIZE %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -154,9 +158,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FCANONICALIZE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FCANONICALIZE %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -200,9 +206,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FCANONICALIZE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FCANONICALIZE %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -246,9 +254,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FCANONICALIZE %1 + %3:vgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + S_ENDPGM 0, implicit %3(<2 x i16>) ... --- @@ -292,9 +302,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FCANONICALIZE %1 + %3:vgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + S_ENDPGM 0, implicit %3(<2 x i16>) ... --- @@ -338,9 +350,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FCANONICALIZE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FCANONICALIZE %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -384,9 +398,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FCANONICALIZE %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FCANONICALIZE %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -429,10 +445,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FABS %0 - %2:vgpr(s32) = G_FCANONICALIZE %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FABS %1 + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %4(i32) ... @@ -476,10 +494,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FABS %0 - %2:vgpr(s32) = G_FCANONICALIZE %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FABS %1 + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -522,10 +542,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FCANONICALIZE %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -568,10 +590,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FCANONICALIZE %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -622,11 +646,13 @@ body: | ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FABS %1 - %3:vgpr(s32) = G_FCANONICALIZE %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(f32) = G_FABS %2 + %4:vgpr(f32) = G_FCANONICALIZE %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... --- @@ -677,9 +703,11 @@ body: | ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FABS %1 - %3:vgpr(s32) = G_FCANONICALIZE %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(f32) = G_FABS %2 + %4:vgpr(f32) = G_FCANONICALIZE %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir index ebddc77e8c099..91104c26c5059 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir @@ -15,11 +15,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FCEIL %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CEIL_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FCEIL %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -36,11 +38,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FCEIL %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CEIL_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FCEIL %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -57,11 +61,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_FCEIL %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[V_CEIL_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FCEIL %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -78,9 +84,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FCEIL %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[V_CEIL_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FCEIL %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir index df2f390124ebd..4bfd4fd8316b3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir @@ -16,16 +16,20 @@ body: | ; GCN-LABEL: name: fceil_s16_ss ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: [[FCEIL:%[0-9]+]]:sreg_32(s16) = G_FCEIL [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FCEIL]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FCEIL %1 - %3:sgpr(s32) = G_ANYEXT %2 - $sgpr0 = COPY %3 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GCN-NEXT: [[FCEIL:%[0-9]+]]:sreg_32(f16) = G_FCEIL [[BITCAST]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(i16) = COPY [[FCEIL]](f16) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; GCN-NEXT: $sgpr0 = COPY [[COPY2]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FCEIL %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -61,11 +65,13 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_CEIL_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CEIL_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FCEIL %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FCEIL %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -100,11 +106,13 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-FAKE16-NEXT: [[V_CEIL_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CEIL_F16_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FCEIL %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FCEIL %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -140,10 +148,12 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_CEIL_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_fake16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CEIL_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s16) = G_FCEIL %2 - %4:vgpr(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(f16) = G_FCEIL %3 + %5:vgpr(i16) = G_BITCAST %4(f16) + %6:vgpr(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir index 15f7a2202b71e..7ba68b2d5c6df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir @@ -14,21 +14,28 @@ body: | ; WAVE64-LABEL: name: fcmp_false_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; WAVE64-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f32), [[BITCAST1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + ; ; WAVE32-LABEL: name: fcmp_false_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(false), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; WAVE32-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f32), [[BITCAST1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(false), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -44,19 +51,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_oeq_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(oeq), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -72,19 +82,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ogt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ogt), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -100,19 +113,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_oge_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(oge), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -128,19 +144,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_olt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(olt), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -156,19 +175,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ole_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ole), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -184,19 +206,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_one_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(one), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(one), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -212,19 +237,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ord_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ord), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -240,19 +268,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_uno_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(uno), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -268,19 +299,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ueq_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ueq), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -296,19 +330,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ugt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ugt), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -324,19 +361,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_uge_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(uge), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -352,19 +392,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ult_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ult), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -380,19 +423,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ule_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(ule), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -408,19 +454,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_une_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(une), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(une), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -434,21 +483,28 @@ body: | ; WAVE64-LABEL: name: fcmp_true_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; WAVE64-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f32), [[BITCAST1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + ; ; WAVE32-LABEL: name: fcmp_true_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(true), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; WAVE32-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f32), [[BITCAST1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(true), %2(f32), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -462,21 +518,28 @@ body: | ; WAVE64-LABEL: name: fcmp_false_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s64), [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; WAVE64-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f64), [[BITCAST1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + ; ; WAVE32-LABEL: name: fcmp_false_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s64), [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(false), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; WAVE32-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f64), [[BITCAST1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(false), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -492,19 +555,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_oeq_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(oeq), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -520,19 +586,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ogt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ogt), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -548,19 +617,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_oge_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(oge), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -576,19 +648,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_olt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(olt), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -604,19 +679,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ole_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ole), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -632,19 +710,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_one_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(one), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(one), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -660,19 +741,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ord_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ord), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -688,19 +772,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_uno_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(uno), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -716,19 +803,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ueq_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ueq), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -744,19 +834,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ugt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ugt), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -772,19 +865,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_uge_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(uge), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -800,19 +896,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ult_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ult), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -828,19 +927,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_ule_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(ule), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -856,19 +958,22 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] + ; ; WAVE32-LABEL: name: fcmp_une_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(une), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(une), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -882,21 +987,28 @@ body: | ; WAVE64-LABEL: name: fcmp_true_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s64), [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; WAVE64-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f64), [[BITCAST1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + ; ; WAVE32-LABEL: name: fcmp_true_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s64), [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_FCMP floatpred(true), %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; WAVE32-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f64), [[BITCAST1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_BITCAST %1(i64) + %4:vcc(i1) = G_FCMP floatpred(true), %2(f64), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -912,20 +1024,23 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], %2, implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; ; WAVE32-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], %2, implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 - %3:vgpr(s32) = G_SELECT %2, %0, %1 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vcc(i1) = G_FCMP floatpred(oeq), %2(f32), %3 + %5:vgpr(i32) = G_SELECT %4(i1), %0, %1 + S_ENDPGM 0, implicit %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus-fake16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus-fake16.mir index 23da26d96b629..ed474a9f23731 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus-fake16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus-fake16.mir @@ -12,18 +12,22 @@ body: | ; GFX11-LABEL: name: fcmp_false_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]] - ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(false), %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f16), [[BITCAST1]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(false), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -41,12 +45,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_EQ_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(oeq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(oeq), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -64,12 +70,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_GT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ogt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ogt), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -87,12 +95,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_GE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(oge), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(oge), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -110,12 +120,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(olt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(olt), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -133,12 +145,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ole), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ole), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- name: fcmp_one_s16_vv @@ -155,12 +169,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LG_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(one), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(one), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -178,12 +194,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LG_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(one), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(one), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -201,12 +219,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_U_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(uno), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(uno), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -224,12 +244,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NLG_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ueq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ueq), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -247,12 +269,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NLE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ugt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ugt), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -270,12 +294,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NLT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(uge), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(uge), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -293,12 +319,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ult), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ult), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -316,12 +344,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NGT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ule), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ule), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -339,12 +369,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NEQ_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(une), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(une), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -358,17 +390,21 @@ body: | ; GFX11-LABEL: name: fcmp_true_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]] - ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(true), %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f16), [[BITCAST1]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(true), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus.mir index a7140e6a74fd4..b240f5a50a3f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.gfx11plus.mir @@ -12,18 +12,22 @@ body: | ; GFX11-LABEL: name: fcmp_false_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]] - ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(false), %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f16), [[BITCAST1]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(false), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -43,12 +47,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(oeq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(oeq), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -68,12 +74,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_GT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ogt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ogt), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -93,12 +101,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_GE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(oge), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(oge), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -118,12 +128,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(olt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(olt), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -143,12 +155,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_LE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ole), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ole), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- name: fcmp_one_s16_vv @@ -167,12 +181,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_LG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(one), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(one), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -192,12 +208,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_LG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(one), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(one), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -217,12 +235,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_U_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(uno), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(uno), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -242,12 +262,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_NLG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ueq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ueq), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -267,12 +289,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_NLE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ugt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ugt), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -292,12 +316,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_NLT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(uge), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(uge), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -317,12 +343,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ult), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ult), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -342,12 +370,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_NGT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ule), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(ule), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -367,12 +397,14 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16 ; GFX11-NEXT: [[V_CMP_NEQ_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_t16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(une), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(une), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -386,17 +418,21 @@ body: | ; GFX11-LABEL: name: fcmp_true_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]] - ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(true), %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f16), [[BITCAST1]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(f16) = G_BITCAST %2(i16) + %6:vgpr(f16) = G_BITCAST %3(i16) + %4:vcc(i1) = G_FCMP floatpred(true), %5(f16), %6 + S_ENDPGM 0, implicit %4(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir index 85b1d402146ce..71f7195a85c23 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -13,29 +13,34 @@ body: | ; WAVE64-LABEL: name: fcmp_false_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; WAVE64-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; WAVE64-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f16), [[BITCAST1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) ; ; WAVE32-LABEL: name: fcmp_false_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(false), %2, %3 - S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; WAVE32-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(false), [[BITCAST]](f16), [[BITCAST1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(false), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -61,13 +66,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(oeq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(oeq), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -93,13 +99,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ogt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(ogt), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -125,13 +132,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(oge), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(oge), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -157,13 +165,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(olt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(olt), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -189,13 +198,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ole), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(ole), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- name: fcmp_one_s16_vv @@ -220,13 +230,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(one), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(one), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -252,13 +263,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(one), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(one), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -284,13 +296,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(uno), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(uno), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -316,13 +329,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ueq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(ueq), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -348,13 +362,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ugt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(ugt), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -380,13 +395,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(uge), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(uge), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -412,13 +428,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ult), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(ult), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -444,13 +461,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(ule), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(ule), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -476,13 +494,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(une), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(une), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... --- @@ -496,28 +515,32 @@ body: | ; WAVE64-LABEL: name: fcmp_true_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; WAVE64-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; WAVE64-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; WAVE64-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f16), [[BITCAST1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) ; ; WAVE32-LABEL: name: fcmp_true_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1) - ; - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_FCMP floatpred(true), %2, %3 - S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; WAVE32-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC1]](i16) + ; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(true), [[BITCAST]](f16), [[BITCAST1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vcc(i1) = G_FCMP floatpred(true), %4(f16), %5 + S_ENDPGM 0, implicit %6(i1) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir index 13e29f15504be..e8cd3f6d4216e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir @@ -17,13 +17,17 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GCN-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] - %0:vgpr(s32) = G_FCONSTANT float 1.0 - %1:vgpr(s32) = G_FCONSTANT float 8.0 - %2:vgpr(s32) = G_FCONSTANT float 1.0 - %3:vgpr(s32) = G_FCONSTANT float 8.0 - $vgpr0 = COPY %0 - $vgpr1 = COPY %1 - S_ENDPGM 0, implicit %2 , implicit %3 + %0:vgpr(f32) = G_FCONSTANT float 1.000000e+00 + %1:vgpr(f32) = G_FCONSTANT float 8.000000e+00 + %2:vgpr(f32) = G_FCONSTANT float 1.000000e+00 + %3:vgpr(f32) = G_FCONSTANT float 8.000000e+00 + %4:vgpr(i32) = G_BITCAST %0(f32) + $vgpr0 = COPY %4(i32) + %5:vgpr(i32) = G_BITCAST %1(f32) + $vgpr1 = COPY %5(i32) + %6:vgpr(i32) = G_BITCAST %2(f32) + %7:vgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %6(i32), implicit %7(i32) ... --- @@ -42,13 +46,17 @@ body: | ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] ; GCN-NEXT: $sgpr1 = COPY [[S_MOV_B32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] - %0:sgpr(s32) = G_FCONSTANT float 1.0 - %1:sgpr(s32) = G_FCONSTANT float 8.0 - %2:sgpr(s32) = G_FCONSTANT float -1.0 - %3:sgpr(s32) = G_FCONSTANT float -8.0 - $sgpr0 = COPY %0 - $sgpr1 = COPY %1 - S_ENDPGM 0, implicit %2 , implicit %3 + %0:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %1:sgpr(f32) = G_FCONSTANT float 8.000000e+00 + %2:sgpr(f32) = G_FCONSTANT float -1.000000e+00 + %3:sgpr(f32) = G_FCONSTANT float -8.000000e+00 + %4:sgpr(i32) = G_BITCAST %0(f32) + $sgpr0 = COPY %4(i32) + %5:sgpr(i32) = G_BITCAST %1(f32) + $sgpr1 = COPY %5(i32) + %6:sgpr(i32) = G_BITCAST %2(f32) + %7:sgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %6(i32), implicit %7(i32) ... @@ -68,13 +76,17 @@ body: | ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MOV_B]] ; GCN-NEXT: $vgpr2_vgpr3 = COPY [[V_MOV_B1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B2]], implicit [[V_MOV_B3]] - %0:vgpr(s64) = G_FCONSTANT double 1.0 - %1:vgpr(s64) = G_FCONSTANT double 8.0 - %2:vgpr(s64) = G_FCONSTANT double -2.0 - %3:vgpr(s64) = G_FCONSTANT double 10.0 - $vgpr0_vgpr1 = COPY %0 - $vgpr2_vgpr3 = COPY %1 - S_ENDPGM 0, implicit %2 , implicit %3 + %0:vgpr(f64) = G_FCONSTANT double 1.000000e+00 + %1:vgpr(f64) = G_FCONSTANT double 8.000000e+00 + %2:vgpr(f64) = G_FCONSTANT double -2.000000e+00 + %3:vgpr(f64) = G_FCONSTANT double 1.000000e+01 + %4:vgpr(i64) = G_BITCAST %0(f64) + $vgpr0_vgpr1 = COPY %4(i64) + %5:vgpr(i64) = G_BITCAST %1(f64) + $vgpr2_vgpr3 = COPY %5(i64) + %6:vgpr(i64) = G_BITCAST %2(f64) + %7:vgpr(i64) = G_BITCAST %3(f64) + S_ENDPGM 0, implicit %6(i64), implicit %7(i64) ... @@ -94,13 +106,19 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]] ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B1]] - %0:sgpr(s64) = G_FCONSTANT double 1.0 - %1:sgpr(s64) = G_FCONSTANT double 8.0 - %2:sgpr(s64) = G_FCONSTANT double -2.0 - %3:sgpr(s64) = G_FCONSTANT double -10.0 - $sgpr0_sgpr1 = COPY %0 - $sgpr2_sgpr3 = COPY %1 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + %0:sgpr(f64) = G_FCONSTANT double 1.000000e+00 + %1:sgpr(f64) = G_FCONSTANT double 8.000000e+00 + %2:sgpr(f64) = G_FCONSTANT double -2.000000e+00 + %3:sgpr(f64) = G_FCONSTANT double -1.000000e+01 + %4:sgpr(i64) = G_BITCAST %0(f64) + $sgpr0_sgpr1 = COPY %4(i64) + %5:sgpr(i64) = G_BITCAST %1(f64) + $sgpr2_sgpr3 = COPY %5(i64) + %6:sgpr(i64) = G_BITCAST %0(f64) + %7:sgpr(i64) = G_BITCAST %1(f64) + %8:sgpr(i64) = G_BITCAST %2(f64) + %9:sgpr(i64) = G_BITCAST %3(f64) + S_ENDPGM 0, implicit %6(i64), implicit %7(i64), implicit %8(i64), implicit %9(i64) ... --- @@ -119,16 +137,20 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GCN-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] - %0:vgpr(s16) = G_FCONSTANT half 1.0 - %1:vgpr(s16) = G_FCONSTANT half 8.0 - %2:vgpr(s32) = G_ANYEXT %0 - %3:vgpr(s32) = G_ANYEXT %1 + %0:vgpr(f16) = G_FCONSTANT half 0xH3C00 + %1:vgpr(f16) = G_FCONSTANT half 0xH4800 + %2:vgpr(i16) = G_BITCAST %0(f16) + %3:vgpr(i32) = G_ANYEXT %2(i16) + %4:vgpr(i16) = G_BITCAST %1(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + %6:vgpr(f16) = G_FCONSTANT half 0xH3C00 + %7:vgpr(f16) = G_FCONSTANT half 0xH4800 + $vgpr0 = COPY %3(i32) + $vgpr1 = COPY %5(i32) + %8:vgpr(i16) = G_BITCAST %6(f16) + %9:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16), implicit %9(i16) - %4:vgpr(s16) = G_FCONSTANT half 1.0 - %5:vgpr(s16) = G_FCONSTANT half 8.0 - $vgpr0 = COPY %2 - $vgpr1 = COPY %3 - S_ENDPGM 0, implicit %4, implicit %5 ... @@ -150,15 +172,19 @@ body: | ; GCN-NEXT: $sgpr0 = COPY [[COPY]] ; GCN-NEXT: $sgpr1 = COPY [[COPY1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] - %0:sgpr(s16) = G_FCONSTANT half 1.0 - %1:sgpr(s16) = G_FCONSTANT half 8.0 - %2:vgpr(s32) = G_ANYEXT %0 - %3:vgpr(s32) = G_ANYEXT %1 - - %4:sgpr(s16) = G_FCONSTANT half 1.0 - %5:sgpr(s16) = G_FCONSTANT half 8.0 - $sgpr0 = COPY %2 - $sgpr1 = COPY %3 - S_ENDPGM 0, implicit %4, implicit %5 + %0:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %1:sgpr(f16) = G_FCONSTANT half 0xH4800 + %2:sgpr(i16) = G_BITCAST %0(f16) + %3:vgpr(i32) = G_ANYEXT %2(i16) + %4:sgpr(i16) = G_BITCAST %1(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + %6:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %7:sgpr(f16) = G_FCONSTANT half 0xH4800 + $sgpr0 = COPY %3(i32) + $sgpr1 = COPY %5(i32) + %8:sgpr(i16) = G_BITCAST %6(f16) + %9:sgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16), implicit %9(i16) + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir index 36ca2e5fb1f85..ec84b0eb0d254 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir @@ -17,10 +17,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_EXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FEXP2 %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FEXP2 %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -39,8 +41,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_EXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FEXP2 %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FEXP2 %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir index df62806b61918..05334805aa2b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir @@ -16,25 +16,31 @@ body: | ; VI-LABEL: name: ffloor_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16) - ; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(f16) = G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(i16) = COPY [[FFLOOR]](f16) + ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; VI-NEXT: $sgpr0 = COPY [[COPY2]](i32) ; ; GCN-LABEL: name: ffloor_s16_ss ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FFLOOR %1 - %3:sgpr(s32) = G_ANYEXT %2 - $sgpr0 = COPY %3 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GCN-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(f16) = G_FFLOOR [[BITCAST]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(i16) = COPY [[FFLOOR]](f16) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; GCN-NEXT: $sgpr0 = COPY [[COPY2]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FFLOOR %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -70,11 +76,13 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FFLOOR %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FFLOOR %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -109,11 +117,13 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FFLOOR %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FFLOOR %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -149,10 +159,12 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s16) = G_FFLOOR %2 - %4:vgpr(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(f16) = G_FFLOOR %3 + %5:vgpr(i16) = G_BITCAST %4(f16) + %6:vgpr(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir index 7d701e8f9b449..eac1fd79f35f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir @@ -15,11 +15,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FFLOOR %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FFLOOR %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -36,11 +38,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FFLOOR %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FFLOOR %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -57,12 +61,14 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FFLOOR %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FNEG %1 + %3:vgpr(f32) = G_FFLOOR %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -78,10 +84,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FFLOOR %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(f32) = G_FFLOOR %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir index d67c924050f45..949d38f80ecb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir @@ -15,11 +15,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FFLOOR %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[V_FLOOR_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FFLOOR %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... # FIXME: Constant bus restriction @@ -33,8 +35,8 @@ body: | # bb.0: # liveins: $sgpr0_sgpr1 -# %0:sgpr(s64) = COPY $sgpr0_sgpr1 -# %1:vgpr(s64) = G_FFLOOR %0 +# %0:sgpr(i64) = COPY $sgpr0_sgpr1 +# %1:vgpr(i64) = G_FFLOOR %0 # $vgpr0_vgpr1 = COPY %1 # ... @@ -52,10 +54,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %2:vreg_64 = nofpexcept V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FNEG %0 - %2:vgpr(s64) = G_FFLOOR %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[V_FLOOR_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FNEG %1 + %3:vgpr(f64) = G_FFLOOR %2 + %4:vgpr(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir index 0b086a2d39d43..a0e209580c02c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir @@ -20,29 +20,35 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX9-DL-LABEL: name: fma_f32 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %3:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-DL-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] + ; ; GFX10-LABEL: name: fma_f32 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FMA %0, %1, %2 - S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_FMA %3, %4, %5 + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... @@ -62,30 +68,36 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX9-DL-LABEL: name: fma_f32_fneg_src0 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX10-LABEL: name: fma_f32_fneg_src0 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %0 - %4:vgpr(s32) = G_FMA %3, %1, %2 - S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %1(i32) + %6:vgpr(f32) = G_BITCAST %2(i32) + %7:vgpr(f32) = G_FMA %4, %5, %6 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -105,30 +117,36 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX9-DL-LABEL: name: fma_f32_fneg_src1 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX10-LABEL: name: fma_f32_fneg_src1 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %1 - %4:vgpr(s32) = G_FMA %0, %3, %2 - S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %2(i32) + %7:vgpr(f32) = G_FMA %5, %4, %6 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -148,30 +166,36 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX9-DL-LABEL: name: fma_f32_fneg_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX10-LABEL: name: fma_f32_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = G_FMA %0, %1, %3 - S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %1(i32) + %7:vgpr(f32) = G_FMA %5, %6, %4 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -191,30 +215,36 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX9-DL-LABEL: name: fma_f32_fabs_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX10-LABEL: name: fma_f32_fabs_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FABS %2 - %4:vgpr(s32) = G_FMA %0, %1, %3 - S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FABS %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %1(i32) + %7:vgpr(f32) = G_FMA %5, %6, %4 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -234,30 +264,44 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %5 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] + ; ; GFX9-DL-LABEL: name: fma_f32_copy_fneg_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %5 + ; GFX9-DL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-DL-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GFX9-DL-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] + ; ; GFX10-LABEL: name: fma_f32_copy_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %5 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = COPY %3 - %5:vgpr(s32) = G_FMA %0, %1, %4 - S_ENDPGM 0, implicit %5 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + %6:vgpr(i32) = COPY %5(i32) + %7:vgpr(f32) = G_BITCAST %0(i32) + %8:vgpr(f32) = G_BITCAST %1(i32) + %9:vgpr(f32) = G_BITCAST %6(i32) + %10:vgpr(f32) = G_FMA %7, %8, %9 + %11:vgpr(i32) = G_BITCAST %10(f32) + S_ENDPGM 0, implicit %11(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir index 5f346559d6ad9..80fbd2e14d9c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir @@ -20,6 +20,7 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] + ; ; GFX10-LABEL: name: fmad_f32 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -28,11 +29,15 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FMAD %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:vgpr(f32) = G_BITCAST %2(i32) + %6:vgpr(f32) = G_FMAD %3, %4, %5 + %7:vgpr(i32) = G_BITCAST %6(f32) + S_ENDPGM 0, implicit %7(i32) ... @@ -54,6 +59,7 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] + ; ; GFX10-LABEL: name: fmad_f32_fneg_src0 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -62,12 +68,16 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %0 - %4:vgpr(s32) = G_FMAD %3, %1, %2 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %1(i32) + %6:vgpr(f32) = G_BITCAST %2(i32) + %7:vgpr(f32) = G_FMAD %4, %5, %6 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -89,6 +99,7 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] + ; ; GFX10-LABEL: name: fmad_f32_fneg_src1 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -97,12 +108,16 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %1 - %4:vgpr(s32) = G_FMAD %0, %3, %2 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %1(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %2(i32) + %7:vgpr(f32) = G_FMAD %5, %4, %6 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -124,6 +139,7 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] + ; ; GFX10-LABEL: name: fmad_f32_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -132,12 +148,16 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = G_FMAD %0, %1, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %1(i32) + %7:vgpr(f32) = G_FMAD %5, %6, %4 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -159,6 +179,7 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] + ; ; GFX10-LABEL: name: fmad_f32_fabs_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -167,12 +188,16 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FABS %2 - %4:vgpr(s32) = G_FMAD %0, %1, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FABS %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_BITCAST %1(i32) + %7:vgpr(f32) = G_FMAD %5, %6, %4 + %8:vgpr(i32) = G_BITCAST %7(f32) + S_ENDPGM 0, implicit %8(i32) ... @@ -192,22 +217,33 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX6-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] + ; ; GFX10-LABEL: name: fmad_f32_copy_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FNEG %2 - %4:vgpr(s32) = COPY %3 - %5:vgpr(s32) = G_FMAD %0, %1, %4 - S_ENDPGM 0, implicit %5 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(f32) = G_BITCAST %2(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + %6:vgpr(i32) = COPY %5(i32) + %7:vgpr(f32) = G_BITCAST %0(i32) + %8:vgpr(f32) = G_BITCAST %1(i32) + %9:vgpr(f32) = G_BITCAST %6(i32) + %10:vgpr(f32) = G_FMAD %7, %8, %9 + %11:vgpr(i32) = G_BITCAST %10(f32) + S_ENDPGM 0, implicit %11(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir index f3021ca75aed3..d2379191025f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir @@ -23,50 +23,58 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMAXNUM_IEEE %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMAXNUM_IEEE %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMAXNUM_IEEE %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMAXNUM_IEEE %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMAXNUM_IEEE %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMAXNUM_IEEE %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; maxnum_ieee vs - %4:vgpr(s32) = G_FMAXNUM_IEEE %1, %0 - ; maxnum_ieee sv - %5:vgpr(s32) = G_FMAXNUM_IEEE %0, %1 - ; maxnum_ieee vv - %6:vgpr(s32) = G_FMAXNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; maxnum_ieee vs - %14:vgpr(s64) = G_FMAXNUM_IEEE %10, %11 - ; maxnum_ieee sv - %15:vgpr(s64) = G_FMAXNUM_IEEE %11, %10 - ; maxnum_ieee vv - %16:vgpr(s64) = G_FMAXNUM_IEEE %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... @@ -93,48 +101,56 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMAXNUM_IEEE %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMAXNUM_IEEE %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMAXNUM_IEEE %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMAXNUM_IEEE %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMAXNUM_IEEE %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMAXNUM_IEEE %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; maxnum_ieee vs - %4:vgpr(s32) = G_FMAXNUM_IEEE %1, %0 - ; maxnum_ieee sv - %5:vgpr(s32) = G_FMAXNUM_IEEE %0, %1 - ; maxnum_ieee vv - %6:vgpr(s32) = G_FMAXNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; maxnum_ieee vs - %14:vgpr(s64) = G_FMAXNUM_IEEE %10, %11 - ; maxnum_ieee sv - %15:vgpr(s64) = G_FMAXNUM_IEEE %11, %10 - ; maxnum_ieee vv - %16:vgpr(s64) = G_FMAXNUM_IEEE %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir index 17ff289f89607..782f19ce7faec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -28,12 +28,15 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FMAXNUM_IEEE %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FMAXNUM_IEEE %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... --- @@ -60,11 +63,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FNEG %3 - %5:vgpr(s16) = G_FMAXNUM_IEEE %2, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FNEG %4 + %6:vgpr(f16) = G_BITCAST %2(i16) + %7:vgpr(f16) = G_FMAXNUM_IEEE %6, %5 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir index 10bd10f9cd3c5..bf791b22512e6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir @@ -17,10 +17,13 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:vgpr(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:vgpr(<2 x f16>) = G_FMAXNUM_IEEE %2, %3 + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir index 467c2914d2b4b..26a0f1b56ea50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir @@ -24,50 +24,58 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMAXNUM %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMAXNUM %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMAXNUM %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMAXNUM %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMAXNUM %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMAXNUM %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; maxnum vs - %4:vgpr(s32) = G_FMAXNUM %1, %0 - ; maxnum sv - %5:vgpr(s32) = G_FMAXNUM %0, %1 - ; maxnum vv - %6:vgpr(s32) = G_FMAXNUM %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; maxnum vs - %14:vgpr(s64) = G_FMAXNUM %10, %11 - ; maxnum sv - %15:vgpr(s64) = G_FMAXNUM %11, %10 - ; maxnum vv - %16:vgpr(s64) = G_FMAXNUM %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... --- @@ -92,48 +100,56 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMAXNUM %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMAXNUM %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMAXNUM %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMAXNUM %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMAXNUM %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMAXNUM %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; maxnum vs - %4:vgpr(s32) = G_FMAXNUM %1, %0 - ; maxnum sv - %5:vgpr(s32) = G_FMAXNUM %0, %1 - ; maxnum vv - %6:vgpr(s32) = G_FMAXNUM %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; maxnum vs - %14:vgpr(s64) = G_FMAXNUM %10, %11 - ; maxnum sv - %15:vgpr(s64) = G_FMAXNUM %11, %10 - ; maxnum vv - %16:vgpr(s64) = G_FMAXNUM %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir index fd0aeb07b49ca..0ba6aff1a5be3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -28,12 +28,15 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FMAXNUM %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FMAXNUM %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... --- @@ -60,11 +63,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FNEG %3 - %5:vgpr(s16) = G_FMAXNUM %2, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FNEG %4 + %6:vgpr(f16) = G_BITCAST %2(i16) + %7:vgpr(f16) = G_FMAXNUM %6, %5 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir index 977e7de4bf818..cb0d77334cb88 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir @@ -19,10 +19,13 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_FMAXNUM %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:vgpr(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:vgpr(<2 x f16>) = G_FMAXNUM %2, %3 + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir index 3728907c43e7f..b9179b6a8bb50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir @@ -23,50 +23,58 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMINNUM_IEEE %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMINNUM_IEEE %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMINNUM_IEEE %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMINNUM_IEEE %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMINNUM_IEEE %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMINNUM_IEEE %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; minnum_ieee vs - %4:vgpr(s32) = G_FMINNUM_IEEE %1, %0 - ; minnum_ieee sv - %5:vgpr(s32) = G_FMINNUM_IEEE %0, %1 - ; minnum_ieee vv - %6:vgpr(s32) = G_FMINNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; minnum_ieee vs - %14:vgpr(s64) = G_FMINNUM_IEEE %10, %11 - ; minnum_ieee sv - %15:vgpr(s64) = G_FMINNUM_IEEE %11, %10 - ; minnum_ieee vv - %16:vgpr(s64) = G_FMINNUM_IEEE %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... # FIXME: Ideally this would fail to select with ieee mode disabled @@ -93,48 +101,56 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMINNUM_IEEE %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMINNUM_IEEE %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMINNUM_IEEE %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMINNUM_IEEE %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMINNUM_IEEE %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMINNUM_IEEE %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; minnum_ieee vs - %4:vgpr(s32) = G_FMINNUM_IEEE %1, %0 - ; minnum_ieee sv - %5:vgpr(s32) = G_FMINNUM_IEEE %0, %1 - ; minnum_ieee vv - %6:vgpr(s32) = G_FMINNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; minnum_ieee vs - %14:vgpr(s64) = G_FMINNUM_IEEE %10, %11 - ; minnum_ieee sv - %15:vgpr(s64) = G_FMINNUM_IEEE %11, %10 - ; minnum_ieee vv - %16:vgpr(s64) = G_FMINNUM_IEEE %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir index d2d9c7edc30ac..dbe8d52c17279 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -28,12 +28,15 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FMINNUM_IEEE %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FMINNUM_IEEE %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... --- @@ -60,11 +63,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FNEG %3 - %5:vgpr(s16) = G_FMINNUM_IEEE %2, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FNEG %4 + %6:vgpr(f16) = G_BITCAST %2(i16) + %7:vgpr(f16) = G_FMINNUM_IEEE %6, %5 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir index c9188387a988e..25e847107bf49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir @@ -17,10 +17,13 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_FMINNUM_IEEE %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:vgpr(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:vgpr(<2 x f16>) = G_FMINNUM_IEEE %2, %3 + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir index bf9752b512632..153d154edcc65 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir @@ -24,50 +24,58 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMINNUM %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMINNUM %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMINNUM %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMINNUM %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMINNUM %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMINNUM %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; minnum vs - %4:vgpr(s32) = G_FMINNUM %1, %0 - ; minnum sv - %5:vgpr(s32) = G_FMINNUM %0, %1 - ; minnum vv - %6:vgpr(s32) = G_FMINNUM %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; minnum vs - %14:vgpr(s64) = G_FMINNUM %10, %11 - ; minnum sv - %15:vgpr(s64) = G_FMINNUM %11, %10 - ; minnum vv - %16:vgpr(s64) = G_FMINNUM %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... --- @@ -92,48 +100,56 @@ body: | ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i64) = COPY $sgpr10_sgpr11 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMINNUM %7, %8 + %10:sgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_BITCAST %1(i32) + %12:vgpr(f32) = G_FMINNUM %10, %11 + %13:vgpr(f32) = G_BITCAST %1(i32) + %14:vgpr(f32) = G_BITCAST %2(i32) + %15:vgpr(f32) = G_FMINNUM %13, %14 + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %15(f32), %3(p1) :: (store (f32), addrspace 1) + %16:sgpr(f64) = G_BITCAST %4(i64) + %17:vgpr(f64) = G_BITCAST %5(i64) + %18:vgpr(f64) = G_FMINNUM %16, %17 + %19:vgpr(f64) = G_BITCAST %5(i64) + %20:sgpr(f64) = G_BITCAST %4(i64) + %21:vgpr(f64) = G_FMINNUM %19, %20 + %22:vgpr(f64) = G_BITCAST %5(i64) + %23:vgpr(f64) = G_BITCAST %6(i64) + %24:vgpr(f64) = G_FMINNUM %22, %23 + %25:vgpr(i64) = G_BITCAST %18(f64) + %26:vgpr(i64) = G_BITCAST %21(f64) + %27:vgpr(i64) = G_BITCAST %24(f64) + S_ENDPGM 0, implicit %25(i64), implicit %26(i64), implicit %27(i64) - %10:sgpr(s64) = COPY $sgpr10_sgpr11 - %11:vgpr(s64) = COPY $vgpr10_vgpr11 - %12:vgpr(s64) = COPY $vgpr12_vgpr13 - ; minnum vs - %4:vgpr(s32) = G_FMINNUM %1, %0 - ; minnum sv - %5:vgpr(s32) = G_FMINNUM %0, %1 - ; minnum vv - %6:vgpr(s32) = G_FMINNUM %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) - ; 64-bit - ; minnum vs - %14:vgpr(s64) = G_FMINNUM %10, %11 - ; minnum sv - %15:vgpr(s64) = G_FMINNUM %11, %10 - ; minnum vv - %16:vgpr(s64) = G_FMINNUM %11, %12 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir index 96285c6c13e86..8a2afaa2ffbda 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -28,12 +28,15 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FMINNUM %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %2(i16) + %5:vgpr(f16) = G_BITCAST %3(i16) + %6:vgpr(f16) = G_FMINNUM %4, %5 + %7:vgpr(i16) = G_BITCAST %6(f16) + S_ENDPGM 0, implicit %7(i16) ... --- @@ -60,11 +63,14 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FNEG %3 - %5:vgpr(s16) = G_FMINNUM %2, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FNEG %4 + %6:vgpr(f16) = G_BITCAST %2(i16) + %7:vgpr(f16) = G_FMINNUM %6, %5 + %8:vgpr(i16) = G_BITCAST %7(f16) + S_ENDPGM 0, implicit %8(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir index ee8ec4a2deda7..6983b9f845ae6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir @@ -17,10 +17,13 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_FMINNUM %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:vgpr(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:vgpr(<2 x f16>) = G_FMINNUM %2, %3 + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir index cd804ba135155..c98c78b61e1df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -20,23 +20,29 @@ body: | ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:vgpr(f32) = G_BITCAST %1(i32) + %5:sgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_BITCAST %0(i32) + %8:vgpr(f32) = G_BITCAST %1(i32) + %9:vgpr(f32) = G_FMUL %7, %8 + %10:vgpr(f32) = G_BITCAST %1(i32) + %11:vgpr(f32) = G_BITCAST %2(i32) + %12:vgpr(f32) = G_FMUL %10, %11 + G_STORE %6(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %9(f32), %3(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %3(p1) :: (store (f32), addrspace 1) - %4:vgpr(s32) = G_FMUL %1, %0 - %5:vgpr(s32) = G_FMUL %0, %1 - %6:vgpr(s32) = G_FMUL %1, %2 - G_STORE %4, %3 :: (store (s32), addrspace 1) - G_STORE %5, %3 :: (store (s32), addrspace 1) - G_STORE %6, %3 :: (store (s32), addrspace 1) ... --- @@ -58,17 +64,26 @@ body: | ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F64_e64_]], implicit [[V_MUL_F64_e64_1]], implicit [[V_MUL_F64_e64_2]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = COPY $vgpr0_vgpr1 - %2:vgpr(s64) = COPY $vgpr2_vgpr3 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i64) = COPY $vgpr0_vgpr1 + %2:vgpr(i64) = COPY $vgpr2_vgpr3 %3:vgpr(p1) = COPY $vgpr4_vgpr5 + %4:vgpr(f64) = G_BITCAST %1(i64) + %5:sgpr(f64) = G_BITCAST %0(i64) + %6:vgpr(f64) = G_FMUL %4, %5 + %7:sgpr(f64) = G_BITCAST %0(i64) + %8:vgpr(f64) = G_BITCAST %1(i64) + %9:vgpr(f64) = G_FMUL %7, %8 + %10:vgpr(f64) = G_BITCAST %1(i64) + %11:vgpr(f64) = G_BITCAST %2(i64) + %12:vgpr(f64) = G_FMUL %10, %11 + %13:vgpr(i64) = G_BITCAST %6(f64) + %14:vgpr(i64) = G_BITCAST %9(f64) + %15:vgpr(i64) = G_BITCAST %12(f64) + S_ENDPGM 0, implicit %13(i64), implicit %14(i64), implicit %15(i64) - %4:vgpr(s64) = G_FMUL %1, %0 - %5:vgpr(s64) = G_FMUL %0, %1 - %6:vgpr(s64) = G_FMUL %1, %2 - S_ENDPGM 0, implicit %4, implicit %5, implicit %6 ... @@ -90,22 +105,31 @@ body: | ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]], implicit [[V_MUL_F16_e64_1]], implicit [[V_MUL_F16_e64_2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 + %4:sgpr(i16) = G_TRUNC %0(i32) + %5:vgpr(i16) = G_TRUNC %1(i32) + %6:vgpr(i16) = G_TRUNC %2(i32) + %7:sgpr(f16) = G_BITCAST %4(i16) + %8:sgpr(f16) = G_BITCAST %4(i16) + %9:vgpr(f16) = G_FMUL %7, %8 + %10:sgpr(f16) = G_BITCAST %4(i16) + %11:sgpr(f16) = G_BITCAST %4(i16) + %12:vgpr(f16) = G_FMUL %10, %11 + %13:sgpr(f16) = G_BITCAST %4(i16) + %14:vgpr(f16) = G_BITCAST %5(i16) + %15:vgpr(f16) = G_FMUL %13, %14 + %16:vgpr(i16) = G_BITCAST %9(f16) + %17:vgpr(i16) = G_BITCAST %12(f16) + %18:vgpr(i16) = G_BITCAST %15(f16) + S_ENDPGM 0, implicit %16(i16), implicit %17(i16), implicit %18(i16) - %4:sgpr(s16) = G_TRUNC %0 - %5:vgpr(s16) = G_TRUNC %1 - %6:vgpr(s16) = G_TRUNC %2 - %8:vgpr(s16) = G_FMUL %4, %4 - %9:vgpr(s16) = G_FMUL %4, %4 - %10:vgpr(s16) = G_FMUL %4, %5 - S_ENDPGM 0, implicit %8, implicit %9, implicit %10 ... --- @@ -132,57 +156,65 @@ body: | ; GCN-NEXT: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_4]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_5]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_6]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_7]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_4]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_5]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_6]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_7]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 %2:vgpr(p1) = COPY $vgpr2_vgpr3 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_FABS %3 + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = G_FNEG %5 + %7:vgpr(f32) = G_FNEG %4 + %8:vgpr(f32) = G_BITCAST %0(i32) + %9:vgpr(f32) = G_FMUL %4, %8 + %10:vgpr(f32) = G_BITCAST %0(i32) + %11:vgpr(f32) = G_FMUL %10, %4 + %12:vgpr(f32) = G_FMUL %4, %4 + %13:vgpr(f32) = G_BITCAST %0(i32) + %14:vgpr(f32) = G_FMUL %6, %13 + %15:vgpr(f32) = G_BITCAST %0(i32) + %16:vgpr(f32) = G_FMUL %15, %6 + %17:vgpr(f32) = G_FMUL %6, %6 + %18:vgpr(f32) = G_BITCAST %0(i32) + %19:vgpr(f32) = G_FMUL %7, %18 + %20:vgpr(f32) = G_BITCAST %0(i32) + %21:vgpr(f32) = G_FMUL %20, %7 + %22:vgpr(f32) = G_FMUL %7, %7 + %23:vgpr(f32) = G_FMUL %7, %6 + G_STORE %9(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %11(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %12(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %14(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %16(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %17(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %19(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %21(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %22(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %23(f32), %2(p1) :: (store (f32), addrspace 1) - %3:vgpr(s32) = G_FABS %0 - %4:vgpr(s32) = G_FNEG %0 - %5:vgpr(s32) = G_FNEG %3 - %6:vgpr(s32) = G_FMUL %3, %0 - %7:vgpr(s32) = G_FMUL %0, %3 - %8:vgpr(s32) = G_FMUL %3, %3 - %9:vgpr(s32) = G_FMUL %4, %0 - %10:vgpr(s32) = G_FMUL %0, %4 - %11:vgpr(s32) = G_FMUL %4, %4 - %12:vgpr(s32) = G_FMUL %5, %0 - %13:vgpr(s32) = G_FMUL %0, %5 - %14:vgpr(s32) = G_FMUL %5, %5 - %15:vgpr(s32) = G_FMUL %5, %4 - G_STORE %6, %2 :: (store (s32), addrspace 1) - G_STORE %7, %2 :: (store (s32), addrspace 1) - G_STORE %8, %2 :: (store (s32), addrspace 1) - G_STORE %9, %2 :: (store (s32), addrspace 1) - G_STORE %10, %2 :: (store (s32), addrspace 1) - G_STORE %11, %2 :: (store (s32), addrspace 1) - G_STORE %12, %2 :: (store (s32), addrspace 1) - G_STORE %13, %2 :: (store (s32), addrspace 1) - G_STORE %14, %2 :: (store (s32), addrspace 1) - G_STORE %15, %2 :: (store (s32), addrspace 1) ... @@ -202,10 +234,12 @@ body: | ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_MUL_F32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FCONSTANT float 1.600000e+01 - %2:vgpr(s32) = G_FMUL %0, %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_FCONSTANT float 1.600000e+01 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_FMUL %2, %1 + %4:vgpr(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) SI_RETURN implicit $vgpr0 ... @@ -226,10 +260,12 @@ body: | ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FCONSTANT double 1.600000e+01 - %2:vgpr(s64) = G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_FCONSTANT double 1.600000e+01 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_FMUL %2, %1 + %4:vgpr(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) SI_RETURN implicit $vgpr0_vgpr1 ... @@ -250,10 +286,12 @@ body: | ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FCONSTANT double 36893488147419103232.0 - %2:vgpr(s64) = G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_FCONSTANT double 0x4400000000000000 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_FMUL %2, %1 + %4:vgpr(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) SI_RETURN implicit $vgpr0_vgpr1 ... @@ -274,10 +312,12 @@ body: | ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 1, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FCONSTANT double -16.0 - %2:vgpr(s64) = G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_FCONSTANT double -1.600000e+01 + %2:vgpr(f64) = G_BITCAST %0(i64) + %3:vgpr(f64) = G_FMUL %2, %1 + %4:vgpr(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) SI_RETURN implicit $vgpr0_vgpr1 ... @@ -298,11 +338,13 @@ body: | ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 3, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FABS %0 - %2:vgpr(s64) = G_FCONSTANT double -16.0 - %3:vgpr(s64) = G_FMUL %1, %2 - $vgpr0_vgpr1 = COPY %3 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(f64) = G_FCONSTANT double -1.600000e+01 + %4:vgpr(f64) = G_FMUL %2, %3 + %5:vgpr(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) SI_RETURN implicit $vgpr0_vgpr1 ... @@ -328,12 +370,14 @@ body: | ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 1, [[REG_SEQUENCE]], 0, [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FABS %0 - %2:vgpr(s64) = G_FNEG %1 - %3:vgpr(s64) = G_FCONSTANT double -16.0 - %4:vgpr(s64) = G_FMUL %2, %3 - $vgpr0_vgpr1 = COPY %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(f64) = G_FNEG %2 + %4:vgpr(f64) = G_FCONSTANT double -1.600000e+01 + %5:vgpr(f64) = G_FMUL %3, %4 + %6:vgpr(i64) = G_BITCAST %5(f64) + $vgpr0_vgpr1 = COPY %6(i64) SI_RETURN implicit $vgpr0_vgpr1 ... @@ -354,13 +398,15 @@ body: | ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 1, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FABS %0 - %2:vgpr(s64) = G_FNEG %1 - %3:vgpr(s64) = G_FCONSTANT double -16.0 - %4:vgpr(s64) = G_FNEG %3 - %5:vgpr(s64) = G_FMUL %2, %4 - $vgpr0_vgpr1 = COPY %5 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(f64) = G_FNEG %2 + %4:vgpr(f64) = G_FCONSTANT double -1.600000e+01 + %5:vgpr(f64) = G_FNEG %4 + %6:vgpr(f64) = G_FMUL %3, %5 + %7:vgpr(i64) = G_BITCAST %6(f64) + $vgpr0_vgpr1 = COPY %7(i64) SI_RETURN implicit $vgpr0_vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir index 873d03ceebe28..6b66fd245a2ba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir @@ -17,12 +17,15 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_FMUL %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:vgpr(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:vgpr(<2 x f16>) = G_FMUL %2, %3 + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + S_ENDPGM 0, implicit %5(<2 x i16>) ... --- @@ -39,14 +42,17 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %4:vgpr_32 = nofpexcept V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %4 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_FNEG %0 - %3:vgpr(<2 x s16>) = G_FNEG %1 - %4:vgpr(<2 x s16>) = G_FMUL %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:vgpr(<2 x f16>) = G_FNEG %2 + %4:vgpr(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %5:vgpr(<2 x f16>) = G_FNEG %4 + %6:vgpr(<2 x f16>) = G_FMUL %3, %5 + %7:vgpr(<2 x i16>) = G_BITCAST %6(<2 x f16>) + S_ENDPGM 0, implicit %7(<2 x i16>) ... --- @@ -68,15 +74,20 @@ body: | ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec ; GFX9-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[V_XOR_B32_e64_]], implicit $exec ; GFX9-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[COPY2]], 16, [[V_AND_B32_e32_]], implicit $exec - ; GFX9-NEXT: %7:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[V_LSHL_OR_B32_e64_]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %7 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_FNEG %3 - %5:vgpr(s32) = G_ANYEXT %4 - %6:vgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %5, %2 - %7:vgpr(<2 x s16>) = G_FMUL %6, %0 - S_ENDPGM 0, implicit %7 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[V_LSHL_OR_B32_e64_]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(f16) = G_BITCAST %3(i16) + %5:vgpr(f16) = G_FNEG %4 + %6:vgpr(i16) = G_BITCAST %5(f16) + %7:vgpr(i32) = G_ANYEXT %6(i16) + %8:vgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %7(i32), %2(i32) + %9:vgpr(<2 x f16>) = G_BITCAST %8(<2 x i16>) + %10:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %11:vgpr(<2 x f16>) = G_FMUL %9, %10 + %12:vgpr(<2 x i16>) = G_BITCAST %11(<2 x f16>) + S_ENDPGM 0, implicit %12(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir index acda00231ec61..55c337992f85c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -45,9 +45,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FNEG %0 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FNEG %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $sgpr0 = COPY %3(i32) ... --- @@ -90,9 +92,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -107,33 +111,43 @@ body: | ; SI-LABEL: name: fneg_s32_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f32) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FNEG]](f32) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; ; VI-LABEL: name: fneg_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f32) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FNEG]](f32) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; ; GFX9-LABEL: name: fneg_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f32) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FNEG]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](i32) ; ; GFX10-LABEL: name: fneg_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FNEG %0 - $vgpr0 = COPY %1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f32) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FNEG]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -176,11 +190,13 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FNEG %1 - %3:sgpr(s32) = G_ANYEXT %2 - $sgpr0 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FNEG %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -223,11 +239,13 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -243,43 +261,53 @@ body: | ; SI-LABEL: name: fneg_s16_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG]](f16) + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; VI-LABEL: name: fneg_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG]](f16) + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; GFX9-LABEL: name: fneg_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG]](f16) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; GFX10-LABEL: name: fneg_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG]](f16) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -322,9 +350,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = G_FNEG %0 - $sgpr0 = COPY %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:sgpr(<2 x f16>) = G_FNEG %1 + %3:sgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $sgpr0 = COPY %3(<2 x i16>) ... --- @@ -367,9 +397,11 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_FNEG %0 - $vgpr0 = COPY %1 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FNEG %1 + %3:vgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -384,33 +416,43 @@ body: | ; SI-LABEL: name: fneg_v2s16_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FNEG]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; VI-LABEL: name: fneg_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FNEG]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; GFX9-LABEL: name: fneg_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FNEG]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; GFX10-LABEL: name: fneg_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = G_FNEG %0 - $vgpr0 = COPY %1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[FNEG]](<2 x f16>) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FNEG %1 + %3:vgpr(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -469,9 +511,11 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_FNEG %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:sgpr(f64) = G_FNEG %1 + %3:sgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -526,9 +570,11 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FNEG %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FNEG %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -543,33 +589,43 @@ body: | ; SI-LABEL: name: fneg_s64_vs ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] - ; SI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FNEG:%[0-9]+]]:vreg_64(f64) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FNEG]](f64) + ; SI-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) ; ; VI-LABEL: name: fneg_s64_vs ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] - ; VI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FNEG:%[0-9]+]]:vreg_64(f64) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FNEG]](f64) + ; VI-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) ; ; GFX9-LABEL: name: fneg_s64_vs ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] - ; GFX9-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vreg_64(f64) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FNEG]](f64) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) ; ; GFX10-LABEL: name: fneg_s64_vs ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_FNEG %0 - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vreg_64(f64) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64(i64) = COPY [[FNEG]](f64) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY1]](i64) + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FNEG %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + S_ENDPGM 0, implicit %3(i64) ... @@ -613,10 +669,12 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FABS %0 - %2:sgpr(s32) = G_FNEG %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FABS %1 + %3:sgpr(f32) = G_FNEG %2 + %4:sgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -659,10 +717,13 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FABS %0 - %2:vgpr(s32) = G_FNEG %0 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FABS %1 + %3:vgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(f32) = G_FNEG %3 + %5:vgpr(i32) = G_BITCAST %4(f32) + S_ENDPGM 0, implicit %5(i32) ... --- @@ -677,42 +738,52 @@ body: | ; SI-LABEL: name: fneg_fabs_s32_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec - ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(f32) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](f32), implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[V_XOR_B32_e64_]](f32) + ; SI-NEXT: S_ENDPGM 0, implicit [[COPY1]](i32) ; ; VI-LABEL: name: fneg_fabs_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(f32) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](f32), implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[V_XOR_B32_e64_]](f32) + ; VI-NEXT: S_ENDPGM 0, implicit [[COPY1]](i32) ; ; GFX9-LABEL: name: fneg_fabs_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(f32) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](f32), implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[V_XOR_B32_e64_]](f32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[COPY1]](i32) ; ; GFX10-LABEL: name: fneg_fabs_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FABS %0 - %2:vgpr(s32) = G_FNEG %1 - S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(f32) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(f32) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](f32), implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[V_XOR_B32_e64_]](f32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY1]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FABS %1 + %3:vgpr(f32) = G_FNEG %2 + %4:vgpr(i32) = G_BITCAST %3(f32) + S_ENDPGM 0, implicit %4(i32) ... --- @@ -755,12 +826,14 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FABS %1 - %3:sgpr(s16) = G_FNEG %2 - %4:sgpr(s32) = G_ANYEXT %3 - $sgpr0 = COPY %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FABS %2 + %4:sgpr(f16) = G_FNEG %3 + %5:sgpr(i16) = G_BITCAST %4(f16) + %6:sgpr(i32) = G_ANYEXT %5(i16) + $sgpr0 = COPY %6(i32) ... --- @@ -807,12 +880,14 @@ body: | ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FABS %1 - %3:vgpr(s16) = G_FNEG %2 - %4:sgpr(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FABS %2 + %4:vgpr(f16) = G_FNEG %3 + %5:vgpr(i16) = G_BITCAST %4(f16) + %6:sgpr(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... --- @@ -828,48 +903,58 @@ body: | ; SI-LABEL: name: fneg_fabs_s16_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FNEG:%[0-9]+]]:sgpr(f16) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[FNEG]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG1]](f16) + ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; VI-LABEL: name: fneg_fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:sgpr(f16) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[FNEG]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG1]](f16) + ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; GFX9-LABEL: name: fneg_fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:sgpr(f16) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[FNEG]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG1]](f16) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY2]](i32) ; ; GFX10-LABEL: name: fneg_fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] - ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FNEG %1 - %3:vgpr(s16) = G_FNEG %2 - %4:sgpr(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:sgpr(f16) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(f16) = G_FNEG [[FNEG]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i16) = COPY [[FNEG1]](f16) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32(i32) = COPY [[COPY1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FNEG %2 + %4:vgpr(f16) = G_FNEG %3 + %5:vgpr(i16) = G_BITCAST %4(f16) + %6:sgpr(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... --- @@ -912,10 +997,12 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = G_FABS %0 - %2:sgpr(<2 x s16>) = G_FNEG %1 - $sgpr0 = COPY %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:sgpr(<2 x f16>) = G_FABS %1 + %3:sgpr(<2 x f16>) = G_FNEG %2 + %4:sgpr(<2 x i16>) = G_BITCAST %3(<2 x f16>) + $sgpr0 = COPY %4(<2 x i16>) ... --- @@ -958,10 +1045,13 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_FABS %0 - %2:vgpr(<2 x s16>) = G_FNEG %0 - $vgpr0 = COPY %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FABS %1 + %3:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:vgpr(<2 x f16>) = G_FNEG %3 + %5:vgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -976,42 +1066,52 @@ body: | ; SI-LABEL: name: fneg_fabs_v2s16_vs ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147516416 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x f16>) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](<2 x f16>), implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[V_XOR_B32_e64_]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; VI-LABEL: name: fneg_fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147516416 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x f16>) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](<2 x f16>), implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[V_XOR_B32_e64_]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; GFX9-LABEL: name: fneg_fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147516416 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x f16>) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](<2 x f16>), implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[V_XOR_B32_e64_]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) ; ; GFX10-LABEL: name: fneg_fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = G_FABS %0 - %2:vgpr(<2 x s16>) = G_FNEG %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x f16>) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i16) = S_MOV_B32 2147516416 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x f16>) = V_XOR_B32_e64 [[S_MOV_B32_]](i16), [[FABS]](<2 x f16>), implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(<2 x i16>) = COPY [[V_XOR_B32_e64_]](<2 x f16>) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](<2 x i16>) + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:vgpr(<2 x f16>) = G_FABS %1 + %3:vgpr(<2 x f16>) = G_FNEG %2 + %4:vgpr(<2 x i16>) = G_BITCAST %3(<2 x f16>) + $vgpr0 = COPY %4(<2 x i16>) ... --- @@ -1070,10 +1170,12 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_FABS %0 - %2:sgpr(s64) = G_FNEG %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:sgpr(f64) = G_FABS %1 + %3:sgpr(f64) = G_FNEG %2 + %4:sgpr(i64) = G_BITCAST %3(f64) + S_ENDPGM 0, implicit %4(i64) ... --- @@ -1128,10 +1230,12 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FABS %0 - %2:vgpr(s64) = G_FNEG %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(f64) = G_FNEG %2 + %4:vgpr(i64) = G_BITCAST %3(f64) + S_ENDPGM 0, implicit %4(i64) ... --- @@ -1146,52 +1250,62 @@ body: | ; SI-LABEL: name: fneg_fabs_s64_vs ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) - ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 - ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub1(f64) + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(i16) = V_XOR_B32_e64 [[S_MOV_B32_]](i32), [[COPY1]](i32), implicit $exec + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub0(f64) + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(f64) = REG_SEQUENCE [[COPY2]](i32), %subreg.sub0, [[V_XOR_B32_e64_]](i16), %subreg.sub1 + ; SI-NEXT: [[COPY3:%[0-9]+]]:vreg_64(i64) = COPY [[REG_SEQUENCE]](f64) + ; SI-NEXT: S_ENDPGM 0, implicit [[COPY3]](i64) ; ; VI-LABEL: name: fneg_fabs_s64_vs ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) - ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 - ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub1(f64) + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(i16) = V_XOR_B32_e64 [[S_MOV_B32_]](i32), [[COPY1]](i32), implicit $exec + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub0(f64) + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(f64) = REG_SEQUENCE [[COPY2]](i32), %subreg.sub0, [[V_XOR_B32_e64_]](i16), %subreg.sub1 + ; VI-NEXT: [[COPY3:%[0-9]+]]:vreg_64(i64) = COPY [[REG_SEQUENCE]](f64) + ; VI-NEXT: S_ENDPGM 0, implicit [[COPY3]](i64) ; ; GFX9-LABEL: name: fneg_fabs_s64_vs ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 - ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub1(f64) + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(i16) = V_XOR_B32_e64 [[S_MOV_B32_]](i32), [[COPY1]](i32), implicit $exec + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub0(f64) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(f64) = REG_SEQUENCE [[COPY2]](i32), %subreg.sub0, [[V_XOR_B32_e64_]](i16), %subreg.sub1 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vreg_64(i64) = COPY [[REG_SEQUENCE]](f64) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[COPY3]](i64) ; ; GFX10-LABEL: name: fneg_fabs_s64_vs ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) - ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 - ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_FABS %0 - %2:vgpr(s64) = G_FNEG %1 - S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vreg_64(f64) = G_FABS [[BITCAST]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub1(f64) + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(i32) = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(i16) = V_XOR_B32_e64 [[S_MOV_B32_]](i32), [[COPY1]](i32), implicit $exec + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[FABS]].sub0(f64) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(f64) = REG_SEQUENCE [[COPY2]](i32), %subreg.sub0, [[V_XOR_B32_e64_]](i16), %subreg.sub1 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64(i64) = COPY [[REG_SEQUENCE]](f64) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY3]](i64) + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_FABS %1 + %3:vgpr(f64) = G_FNEG %2 + %4:vgpr(i64) = G_BITCAST %3(f64) + S_ENDPGM 0, implicit %4(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir index 03cb907f82a16..1b223afc7c2ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -34,9 +34,10 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FPTOSI %0 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(i32) = G_FPTOSI %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -69,9 +70,10 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FPTOSI %0 - $vgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(i32) = G_FPTOSI %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -104,10 +106,11 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FPTOSI %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_FNEG %1 + %3:vgpr(i32) = G_FPTOSI %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -152,10 +155,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOSI %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOSI %2(f16) + $vgpr0 = COPY %3(i32) ... --- @@ -199,10 +203,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOSI %1 - $vgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOSI %2(f16) + $vgpr0 = COPY %3(i32) ... --- @@ -254,11 +259,12 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s32) = G_FPTOSI %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(i32) = G_FPTOSI %3(f16) + $vgpr0 = COPY %4(i32) ... --- @@ -304,11 +310,12 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOSI %1 - %3:vgpr(s1) = G_TRUNC %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOSI %2(f16) + %4:vgpr(i1) = G_TRUNC %3(i32) + S_ENDPGM 0, implicit %4(i1) ... --- @@ -353,11 +360,12 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOSI %1 - %3:vgpr(s1) = G_TRUNC %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOSI %2(f16) + %4:vgpr(i1) = G_TRUNC %3(i32) + S_ENDPGM 0, implicit %4(i1) ... --- @@ -410,10 +418,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s32) = G_FPTOSI %2 - %4:vgpr(s1) = G_TRUNC %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(i32) = G_FPTOSI %3(f16) + %5:vgpr(i1) = G_TRUNC %4(i32) + S_ENDPGM 0, implicit %5(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir index 521a0e8a2a796..0930f18c25829 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -22,8 +22,8 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GCN-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; VI-LABEL: name: fptoui ; VI: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 @@ -33,8 +33,8 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; VI-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) + ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX11-LABEL: name: fptoui ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 @@ -44,20 +44,22 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX11-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - %0:sgpr(s32) = COPY $sgpr0 + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec :: (store (i32), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY $vgpr3_vgpr4 + %3:sgpr(f32) = G_BITCAST %0(i32) + %4:vgpr(i32) = G_FPTOUI %3(f32) + %5:vgpr(f32) = G_BITCAST %1(i32) + %6:vgpr(i32) = G_FPTOUI %5(f32) + G_STORE %4(i32), %2(p1) :: (store (i32), addrspace 1) + G_STORE %6(i32), %2(p1) :: (store (i32), addrspace 1) - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY $vgpr3_vgpr4 - %3:vgpr(s32) = G_FPTOUI %0 - %4:vgpr(s32) = G_FPTOUI %1 - G_STORE %3, %2 :: (store (s32), addrspace 1) - G_STORE %4, %2 :: (store (s32), addrspace 1) ... --- @@ -102,10 +104,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOUI %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOUI %2(f16) + $vgpr0 = COPY %3(i32) ... --- @@ -149,10 +152,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOUI %1 - $vgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOUI %2(f16) + $vgpr0 = COPY %3(i32) ... --- @@ -204,11 +208,12 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s32) = G_FPTOUI %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(i32) = G_FPTOUI %3(f16) + $vgpr0 = COPY %4(i32) ... --- @@ -254,11 +259,12 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOUI %1 - %3:vgpr(s1) = G_TRUNC %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOUI %2(f16) + %4:vgpr(i1) = G_TRUNC %3(i32) + S_ENDPGM 0, implicit %4(i1) ... --- @@ -303,11 +309,12 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_FPTOUI %1 - %3:vgpr(s1) = G_TRUNC %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(i32) = G_FPTOUI %2(f16) + %4:vgpr(i1) = G_TRUNC %3(i32) + S_ENDPGM 0, implicit %4(i1) ... --- @@ -360,10 +367,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s32) = G_FPTOUI %2 - %4:vgpr(s1) = G_TRUNC %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(i32) = G_FPTOUI %3(f16) + %5:vgpr(i1) = G_TRUNC %4(i32) + S_ENDPGM 0, implicit %5(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index 52b1beb0b0594..c44d4db2b2d04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -16,57 +16,73 @@ body: | ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %3:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %3:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY %3.sub0_sub1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY %3.sub2_sub3 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (f64), addrspace 1) ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY6]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY8]], 0, 0, implicit $exec :: (store (f64), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: fract_f64_neg ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (f64), addrspace 1) ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX11-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY6]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY8]], 0, 0, implicit $exec :: (store (f64), addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 - %2:sgpr(p4) = COPY $sgpr0_sgpr1 - %7:sgpr(s64) = G_CONSTANT i64 36 - %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) - %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) - %15:sgpr(p1) = G_INTTOPTR %13(s64) - %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) - %19:sgpr(s64) = G_FCONSTANT double -0.000000e+00 - %24:sgpr(s64) = G_FNEG %18 - %25:vgpr(s64) = COPY %19(s64) - %26:vgpr(s64) = COPY %24(s64) - %20:vgpr(s64) = G_FADD %25, %26 - %21:vgpr(s64) = G_FFLOOR %20 - %23:vgpr(s64) = G_FNEG %21 - %22:vgpr(s64) = G_FADD %20, %23 - %12:sgpr(p1) = G_INTTOPTR %10(s64) - %27:vgpr(p1) = COPY %12(p1) - G_STORE %22(s64), %27(p1) :: (store (s64), addrspace 1) + %0:sgpr(p4) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_CONSTANT i64 36 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(<2 x i64>) = G_LOAD %2(p4) :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) + %4:sgpr(i64), %5:sgpr(i64) = G_UNMERGE_VALUES %3(<2 x i64>) + %6:sgpr(p1) = G_INTTOPTR %5(i64) + %7:sgpr(f64) = G_LOAD %6(p1) :: (load (f64), addrspace 1) + %8:sgpr(f64) = G_FCONSTANT double -0.000000e+00 + %9:sgpr(f64) = G_FNEG %7 + %10:sgpr(i64) = G_BITCAST %8(f64) + %11:vgpr(i64) = COPY %10(i64) + %12:sgpr(i64) = G_BITCAST %9(f64) + %13:vgpr(i64) = COPY %12(i64) + %14:vgpr(f64) = G_BITCAST %11(i64) + %15:vgpr(f64) = G_BITCAST %13(i64) + %16:vgpr(f64) = G_FADD %14, %15 + %17:vgpr(f64) = G_FFLOOR %16 + %18:vgpr(f64) = G_FNEG %17 + %19:vgpr(f64) = G_FADD %16, %18 + %20:sgpr(p1) = G_INTTOPTR %4(i64) + %21:vgpr(p1) = COPY %20(p1) + G_STORE %19(f64), %21(p1) :: (store (f64), addrspace 1) S_ENDPGM 0 ... @@ -84,57 +100,73 @@ body: | ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %3:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %3:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY %3.sub0_sub1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY %3.sub2_sub3 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (f64), addrspace 1) ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY6]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY8]], 0, 0, implicit $exec :: (store (f64), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: fract_f64_neg_abs ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (f64), addrspace 1) ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY6]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]] ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY8]], 0, 0, implicit $exec :: (store (f64), addrspace 1) ; GFX11-NEXT: S_ENDPGM 0 - %2:sgpr(p4) = COPY $sgpr0_sgpr1 - %7:sgpr(s64) = G_CONSTANT i64 36 - %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) - %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) - %15:sgpr(p1) = G_INTTOPTR %13(s64) - %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) - %19:sgpr(s64) = G_FABS %18 - %20:sgpr(s64) = G_FCONSTANT double -0.000000e+00 - %25:sgpr(s64) = G_FNEG %19 - %26:vgpr(s64) = COPY %20(s64) - %27:vgpr(s64) = COPY %25(s64) - %21:vgpr(s64) = G_FADD %26, %27 - %22:vgpr(s64) = G_FFLOOR %21 - %24:vgpr(s64) = G_FNEG %22 - %23:vgpr(s64) = G_FADD %21, %24 - %12:sgpr(p1) = G_INTTOPTR %10(s64) - %28:vgpr(p1) = COPY %12(p1) - G_STORE %23(s64), %28(p1) :: (store (s64), addrspace 1) + %0:sgpr(p4) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_CONSTANT i64 36 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(<2 x i64>) = G_LOAD %2(p4) :: (dereferenceable invariant load (<2 x i64>), align 4, addrspace 4) + %4:sgpr(i64), %5:sgpr(i64) = G_UNMERGE_VALUES %3(<2 x i64>) + %6:sgpr(p1) = G_INTTOPTR %5(i64) + %7:sgpr(f64) = G_LOAD %6(p1) :: (load (f64), addrspace 1) + %8:sgpr(f64) = G_FABS %7 + %9:sgpr(f64) = G_FCONSTANT double -0.000000e+00 + %10:sgpr(f64) = G_FNEG %8 + %11:sgpr(i64) = G_BITCAST %9(f64) + %12:vgpr(i64) = COPY %11(i64) + %13:sgpr(i64) = G_BITCAST %10(f64) + %14:vgpr(i64) = COPY %13(i64) + %15:vgpr(f64) = G_BITCAST %12(i64) + %16:vgpr(f64) = G_BITCAST %14(i64) + %17:vgpr(f64) = G_FADD %15, %16 + %18:vgpr(f64) = G_FFLOOR %17 + %19:vgpr(f64) = G_FNEG %18 + %20:vgpr(f64) = G_FADD %17, %19 + %21:sgpr(p1) = G_INTTOPTR %4(i64) + %22:vgpr(p1) = COPY %21(p1) + G_STORE %20(f64), %22(p1) :: (store (f64), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir index a5c35b043d451..f057adcd5f866 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir @@ -15,7 +15,7 @@ body: | ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] %0:sgpr(p5) = G_FRAME_INDEX %stack.0 - $sgpr0 = COPY %0 + $sgpr0 = COPY %0(p5) ... @@ -33,6 +33,6 @@ body: | ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir index 7afc2b9015c3c..7f93d16b23e5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir @@ -18,16 +18,17 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0(s32) - %2:vgpr(s1) = G_FREEZE %1 - %3:vgpr(s32) = G_ANYEXT %2(s1) - $vgpr0 = COPY %3(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i1) = G_FREEZE %1 + %3:vgpr(i32) = G_ANYEXT %2(i1) + $vgpr0 = COPY %3(i32) ... @@ -45,16 +46,17 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_agpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: $agpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0(s32) - %2:vgpr(s1) = G_FREEZE %1 - %3:vgpr(s32) = G_ANYEXT %2(s1) - $agpr0 = COPY %3(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i1) = G_FREEZE %1 + %3:vgpr(i32) = G_ANYEXT %2(i1) + $agpr0 = COPY %3(i32) ... @@ -75,6 +77,7 @@ body: | ; GFX6-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] ; GFX6-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; ; GFX10-LABEL: name: test_freeze_s1_vcc ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -83,11 +86,11 @@ body: | ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[V_CMP_EQ_U32_e64_]] ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY2]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_ICMP intpred(eq), %0(s32), %1 - %3:vcc(s1) = G_FREEZE %2 - S_ENDPGM 0, implicit %3(s1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:vcc(i1) = G_FREEZE %2 + S_ENDPGM 0, implicit %3(i1) ... @@ -105,16 +108,17 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s16_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0(s32) - %2:vgpr(s16) = G_FREEZE %1 - %3:vgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i16) = G_FREEZE %1 + %3:vgpr(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... @@ -132,14 +136,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FREEZE %0 - $vgpr0 = COPY %1(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... @@ -157,14 +162,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: $sgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_sgpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FREEZE %0 - $sgpr0 = COPY %1(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_FREEZE %0 + $sgpr0 = COPY %1(i32) ... @@ -182,14 +188,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_vgpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FREEZE %0 - $vgpr0 = COPY %1(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... @@ -207,14 +214,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_agpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: $agpr0 = COPY [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FREEZE %0 - $agpr0 = COPY %1(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_FREEZE %0 + $agpr0 = COPY %1(i32) ... @@ -232,14 +240,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_agpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: $agpr0 = COPY [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FREEZE %0 - $agpr0 = COPY %1(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_FREEZE %0 + $agpr0 = COPY %1(i32) ... @@ -257,14 +266,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_agpr_to_vgpr ; GFX10: liveins: $agpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] - %0:agpr(s32) = COPY $agpr0 - %1:agpr(s32) = G_FREEZE %0 - $vgpr0 = COPY %1(s32) + %0:agpr(i32) = COPY $agpr0 + %1:agpr(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... @@ -282,14 +292,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s32_agpr_to_agpr ; GFX10: liveins: $agpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 ; GFX10-NEXT: $agpr0 = COPY [[COPY]] - %0:agpr(s32) = COPY $agpr0 - %1:agpr(s32) = G_FREEZE %0 - $agpr0 = COPY %1(s32) + %0:agpr(i32) = COPY $agpr0 + %1:agpr(i32) = G_FREEZE %0 + $agpr0 = COPY %1(i32) ... @@ -307,14 +318,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1(s64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -332,14 +344,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s128 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] - %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(s128) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128) + %0:vgpr(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(i128) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... @@ -357,14 +370,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_256 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s256) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256) + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i256) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... @@ -382,14 +396,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_s512 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] - %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:vgpr(s512) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512) + %0:vgpr(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:vgpr(i512) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) ... @@ -407,14 +422,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1(<2 x s32>) + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -432,14 +448,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v3s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] - %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(<3 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>) + %0:vgpr(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(<3 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... @@ -457,14 +474,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v4s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] - %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(<4 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>) + %0:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(<4 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -482,14 +500,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v5s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] - %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - %1:vgpr(<5 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>) + %0:vgpr(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:vgpr(<5 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x i32>) ... @@ -507,14 +526,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v8s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(<8 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>) + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(<8 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... @@ -532,14 +552,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v16s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] - %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:vgpr(<16 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>) + %0:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:vgpr(<16 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... @@ -557,14 +578,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v2s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_FREEZE %0 - $vgpr0 = COPY %1(<2 x s16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = G_FREEZE %0 + $vgpr0 = COPY %1(<2 x i16>) ... @@ -582,14 +604,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1(<4 x s16>) + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -607,14 +630,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v6s16 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] - %0:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(<6 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>) + %0:vgpr(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(<6 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... @@ -632,14 +656,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v8s16 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] - %0:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(<8 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>) + %0:vgpr(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(<8 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... @@ -657,14 +682,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] - %0:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(<2 x s64>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>) + %0:vgpr(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(<2 x i64>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... @@ -682,6 +708,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p0 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -707,6 +734,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -732,6 +760,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -757,6 +786,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p3 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -782,6 +812,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -807,6 +838,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p5 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -832,6 +864,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; ; GFX10-LABEL: name: test_freeze_p999 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir index 1fb67fe17cb0a..357562f8cacd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir @@ -32,10 +32,10 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[V_ALIGNBIT_B32_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_FSHR %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_FSHR %0, %1, %2(i32) + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir index 0d7548892addb..ad160a85d3c91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir @@ -49,20 +49,24 @@ body: | ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = COPY $vgpr4 - %5:sgpr(s32) = G_IMPLICIT_DEF - %6:sgpr(s32) = G_CONSTANT i32 2 - %7:vgpr(s32) = COPY %6(s32) - %8:vcc(s1) = G_ICMP intpred(eq), %0(s32), %7 - %9:vgpr(s32) = G_SELECT %8(s1), %1, %2 - %10:vgpr(s32) = G_SELECT %8(s1), %3, %4 - %11:vgpr(s32) = COPY %5(s32) - %12:vgpr(s32) = COPY %5(s32) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %9(s32), %10(s32), %11(s32), %12(s32), -1, -1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i32) = COPY $vgpr4 + %5:sgpr(i32) = G_IMPLICIT_DEF + %6:sgpr(i32) = G_CONSTANT i32 2 + %7:vgpr(i32) = COPY %6(i32) + %8:vcc(i1) = G_ICMP intpred(eq), %0(i32), %7 + %9:vgpr(i32) = G_SELECT %8(i1), %1, %2 + %10:vgpr(i32) = G_SELECT %8(i1), %3, %4 + %11:vgpr(i32) = COPY %5(i32) + %12:vgpr(i32) = COPY %5(i32) + %13:vgpr(f32) = G_BITCAST %9(i32) + %14:vgpr(f32) = G_BITCAST %10(i32) + %15:vgpr(f32) = G_BITCAST %11(i32) + %16:vgpr(f32) = G_BITCAST %12(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %13(f32), %14(f32), %15(f32), %16(f32), -1, -1 S_ENDPGM 0 ... @@ -121,22 +125,25 @@ body: | ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(s32) = COPY $vgpr2 - %4:vgpr(s32) = COPY $vgpr3 - %5:sgpr(s32) = G_IMPLICIT_DEF - %6:sgpr(s32) = G_CONSTANT i32 2 - %7:sgpr(s32) = G_ICMP intpred(eq), %0(s32), %6 - %8:sgpr(s1) = G_TRUNC %7(s32) - %9:vcc(s1) = COPY %8(s1) - %10:vgpr(s32) = G_SELECT %9(s1), %1, %2 - %11:vcc(s1) = COPY %8(s1) - %12:vgpr(s32) = G_SELECT %11(s1), %3, %4 - %13:vgpr(s32) = COPY %5(s32) - %14:vgpr(s32) = COPY %5(s32) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %10(s32), %12(s32), %13(s32), %14(s32), -1, -1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:vgpr(i32) = COPY $vgpr2 + %4:vgpr(i32) = COPY $vgpr3 + %5:sgpr(i32) = G_IMPLICIT_DEF + %6:sgpr(i32) = G_CONSTANT i32 2 + %7:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %6 + %8:sgpr(i1) = G_TRUNC %7(i32) + %9:vcc(i1) = COPY %8(i1) + %10:vgpr(i32) = G_SELECT %9(i1), %1, %2 + %11:vcc(i1) = COPY %8(i1) + %12:vgpr(i32) = G_SELECT %11(i1), %3, %4 + %13:vgpr(i32) = COPY %5(i32) + %14:vgpr(i32) = COPY %5(i32) + %15:vgpr(f32) = G_BITCAST %10(i32) + %16:vgpr(f32) = G_BITCAST %12(i32) + %17:vgpr(f32) = G_BITCAST %13(i32) + %18:vgpr(f32) = G_BITCAST %14(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %15(f32), %16(f32), %17(f32), %18(f32), -1, -1 S_ENDPGM 0 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir index 90335b7e56734..4ec42a29a82d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir @@ -31,22 +31,22 @@ body: | liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s32) = COPY $sgpr2 - %4:sgpr(s32) = COPY $sgpr3 - %5:sgpr(s32) = COPY $sgpr4 - %6:sgpr(s32) = COPY $sgpr5 - %7:sgpr(s32) = COPY $sgpr6 - %8:sgpr(s32) = COPY $sgpr7 - %9:sgpr(s32) = G_ICMP intpred(ne), %1, %2 - %10:sgpr(s32) = G_ICMP intpred(ne), %5, %6 - %11:sgpr(s32) = G_SELECT %9, %7, %8 - %12:sgpr(s32) = G_SELECT %10, %3, %4 - %13:vgpr(s32) = COPY %11 - G_STORE %13, %0 :: (volatile store (s32), addrspace 1) - %14:vgpr(s32) = COPY %12 - G_STORE %14, %0 :: (volatile store (s32), addrspace 1) + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i32) = COPY $sgpr2 + %4:sgpr(i32) = COPY $sgpr3 + %5:sgpr(i32) = COPY $sgpr4 + %6:sgpr(i32) = COPY $sgpr5 + %7:sgpr(i32) = COPY $sgpr6 + %8:sgpr(i32) = COPY $sgpr7 + %9:sgpr(i32) = G_ICMP intpred(ne), %1, %2 + %10:sgpr(i32) = G_ICMP intpred(ne), %5, %6 + %11:sgpr(i32) = G_SELECT %9, %7, %8 + %12:sgpr(i32) = G_SELECT %10, %3, %4 + %13:vgpr(i32) = COPY %11 + G_STORE %13, %0 :: (volatile store (i32), addrspace 1) + %14:vgpr(i32) = COPY %12 + G_STORE %14, %0 :: (volatile store (i32), addrspace 1) ... --- @@ -71,50 +71,50 @@ body: | liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s32) = COPY $sgpr2 - %4:sgpr(s32) = COPY $sgpr3 - %5:sgpr(s32) = G_ICMP intpred(ne), %1, %2 - %6:sgpr(s32) = G_ICMP intpred(eq), %1, %2 - %7:sgpr(s32) = G_ICMP intpred(sgt), %1, %2 - %8:sgpr(s32) = G_ICMP intpred(sge), %1, %2 - %9:sgpr(s32) = G_ICMP intpred(slt), %1, %2 - %10:sgpr(s32) = G_ICMP intpred(sle), %1, %2 - %11:sgpr(s32) = G_ICMP intpred(ugt), %1, %2 - %12:sgpr(s32) = G_ICMP intpred(uge), %1, %2 - %13:sgpr(s32) = G_ICMP intpred(ult), %1, %2 - %14:sgpr(s32) = G_ICMP intpred(ule), %1, %2 - %15:sgpr(s32) = G_SELECT %5, %3, %4 - %16:sgpr(s32) = G_SELECT %6, %3, %4 - %17:sgpr(s32) = G_SELECT %7, %3, %4 - %18:sgpr(s32) = G_SELECT %8, %3, %4 - %19:sgpr(s32) = G_SELECT %9, %3, %4 - %20:sgpr(s32) = G_SELECT %10, %3, %4 - %21:sgpr(s32) = G_SELECT %11, %3, %4 - %22:sgpr(s32) = G_SELECT %12, %3, %4 - %23:sgpr(s32) = G_SELECT %13, %3, %4 - %24:sgpr(s32) = G_SELECT %14, %3, %4 - %25:vgpr(s32) = COPY %15 - G_STORE %25, %0 :: (volatile store (s32), addrspace 1) - %26:vgpr(s32) = COPY %16 - G_STORE %26, %0 :: (volatile store (s32), addrspace 1) - %27:vgpr(s32) = COPY %17 - G_STORE %27, %0 :: (volatile store (s32), addrspace 1) - %28:vgpr(s32) = COPY %18 - G_STORE %28, %0 :: (volatile store (s32), addrspace 1) - %29:vgpr(s32) = COPY %19 - G_STORE %29, %0 :: (volatile store (s32), addrspace 1) - %30:vgpr(s32) = COPY %20 - G_STORE %30, %0 :: (volatile store (s32), addrspace 1) - %31:vgpr(s32) = COPY %21 - G_STORE %31, %0 :: (volatile store (s32), addrspace 1) - %32:vgpr(s32) = COPY %22 - G_STORE %32, %0 :: (volatile store (s32), addrspace 1) - %33:vgpr(s32) = COPY %23 - G_STORE %33, %0 :: (volatile store (s32), addrspace 1) - %34:vgpr(s32) = COPY %24 - G_STORE %34, %0 :: (volatile store (s32), addrspace 1) + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i32) = COPY $sgpr2 + %4:sgpr(i32) = COPY $sgpr3 + %5:sgpr(i32) = G_ICMP intpred(ne), %1, %2 + %6:sgpr(i32) = G_ICMP intpred(eq), %1, %2 + %7:sgpr(i32) = G_ICMP intpred(sgt), %1, %2 + %8:sgpr(i32) = G_ICMP intpred(sge), %1, %2 + %9:sgpr(i32) = G_ICMP intpred(slt), %1, %2 + %10:sgpr(i32) = G_ICMP intpred(sle), %1, %2 + %11:sgpr(i32) = G_ICMP intpred(ugt), %1, %2 + %12:sgpr(i32) = G_ICMP intpred(uge), %1, %2 + %13:sgpr(i32) = G_ICMP intpred(ult), %1, %2 + %14:sgpr(i32) = G_ICMP intpred(ule), %1, %2 + %15:sgpr(i32) = G_SELECT %5, %3, %4 + %16:sgpr(i32) = G_SELECT %6, %3, %4 + %17:sgpr(i32) = G_SELECT %7, %3, %4 + %18:sgpr(i32) = G_SELECT %8, %3, %4 + %19:sgpr(i32) = G_SELECT %9, %3, %4 + %20:sgpr(i32) = G_SELECT %10, %3, %4 + %21:sgpr(i32) = G_SELECT %11, %3, %4 + %22:sgpr(i32) = G_SELECT %12, %3, %4 + %23:sgpr(i32) = G_SELECT %13, %3, %4 + %24:sgpr(i32) = G_SELECT %14, %3, %4 + %25:vgpr(i32) = COPY %15 + G_STORE %25, %0 :: (volatile store (i32), addrspace 1) + %26:vgpr(i32) = COPY %16 + G_STORE %26, %0 :: (volatile store (i32), addrspace 1) + %27:vgpr(i32) = COPY %17 + G_STORE %27, %0 :: (volatile store (i32), addrspace 1) + %28:vgpr(i32) = COPY %18 + G_STORE %28, %0 :: (volatile store (i32), addrspace 1) + %29:vgpr(i32) = COPY %19 + G_STORE %29, %0 :: (volatile store (i32), addrspace 1) + %30:vgpr(i32) = COPY %20 + G_STORE %30, %0 :: (volatile store (i32), addrspace 1) + %31:vgpr(i32) = COPY %21 + G_STORE %31, %0 :: (volatile store (i32), addrspace 1) + %32:vgpr(i32) = COPY %22 + G_STORE %32, %0 :: (volatile store (i32), addrspace 1) + %33:vgpr(i32) = COPY %23 + G_STORE %33, %0 :: (volatile store (i32), addrspace 1) + %34:vgpr(i32) = COPY %24 + G_STORE %34, %0 :: (volatile store (i32), addrspace 1) ... --- @@ -141,20 +141,20 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(s32) = COPY $vgpr4 - %4:vgpr(s32) = COPY $vgpr5 - %5:vgpr(s32) = COPY $vgpr6 - %6:vgpr(s32) = COPY $vgpr7 - %7:vgpr(s32) = COPY $vgpr8 - %8:vgpr(s32) = COPY $vgpr9 - %9:vcc(s1) = G_ICMP intpred(ne), %1, %2 - %10:vcc(s1) = G_ICMP intpred(ne), %5, %6 - %11:vgpr(s32) = G_SELECT %9, %7, %8 - %12:vgpr(s32) = G_SELECT %10, %3, %4 - G_STORE %11, %0 :: (volatile store (s32), addrspace 1) - G_STORE %12, %0 :: (volatile store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(i32) = COPY $vgpr4 + %4:vgpr(i32) = COPY $vgpr5 + %5:vgpr(i32) = COPY $vgpr6 + %6:vgpr(i32) = COPY $vgpr7 + %7:vgpr(i32) = COPY $vgpr8 + %8:vgpr(i32) = COPY $vgpr9 + %9:vcc(i1) = G_ICMP intpred(ne), %1, %2 + %10:vcc(i1) = G_ICMP intpred(ne), %5, %6 + %11:vgpr(i32) = G_SELECT %9, %7, %8 + %12:vgpr(i32) = G_SELECT %10, %3, %4 + G_STORE %11, %0 :: (volatile store (i32), addrspace 1) + G_STORE %12, %0 :: (volatile store (i32), addrspace 1) ... --- name: icmp_s32_valu @@ -178,40 +178,40 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(s32) = COPY $vgpr4 - %4:vgpr(s32) = COPY $vgpr5 - %5:vcc(s1) = G_ICMP intpred(ne), %1, %2 - %6:vcc(s1) = G_ICMP intpred(eq), %1, %2 - %7:vcc(s1) = G_ICMP intpred(sgt), %1, %2 - %8:vcc(s1) = G_ICMP intpred(sge), %1, %2 - %9:vcc(s1) = G_ICMP intpred(slt), %1, %2 - %10:vcc(s1) = G_ICMP intpred(sle), %1, %2 - %11:vcc(s1) = G_ICMP intpred(ugt), %1, %2 - %12:vcc(s1) = G_ICMP intpred(uge), %1, %2 - %13:vcc(s1) = G_ICMP intpred(ult), %1, %2 - %14:vcc(s1) = G_ICMP intpred(ule), %1, %2 - %15:vgpr(s32) = G_SELECT %5, %3, %4 - %16:vgpr(s32) = G_SELECT %6, %3, %4 - %17:vgpr(s32) = G_SELECT %7, %3, %4 - %18:vgpr(s32) = G_SELECT %8, %3, %4 - %19:vgpr(s32) = G_SELECT %9, %3, %4 - %20:vgpr(s32) = G_SELECT %10, %3, %4 - %21:vgpr(s32) = G_SELECT %11, %3, %4 - %22:vgpr(s32) = G_SELECT %12, %3, %4 - %23:vgpr(s32) = G_SELECT %13, %3, %4 - %24:vgpr(s32) = G_SELECT %14, %3, %4 - G_STORE %15, %0 :: (volatile store (s32), addrspace 1) - G_STORE %16, %0 :: (volatile store (s32), addrspace 1) - G_STORE %17, %0 :: (volatile store (s32), addrspace 1) - G_STORE %18, %0 :: (volatile store (s32), addrspace 1) - G_STORE %19, %0 :: (volatile store (s32), addrspace 1) - G_STORE %20, %0 :: (volatile store (s32), addrspace 1) - G_STORE %21, %0 :: (volatile store (s32), addrspace 1) - G_STORE %22, %0 :: (volatile store (s32), addrspace 1) - G_STORE %23, %0 :: (volatile store (s32), addrspace 1) - G_STORE %24, %0 :: (volatile store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(i32) = COPY $vgpr4 + %4:vgpr(i32) = COPY $vgpr5 + %5:vcc(i1) = G_ICMP intpred(ne), %1, %2 + %6:vcc(i1) = G_ICMP intpred(eq), %1, %2 + %7:vcc(i1) = G_ICMP intpred(sgt), %1, %2 + %8:vcc(i1) = G_ICMP intpred(sge), %1, %2 + %9:vcc(i1) = G_ICMP intpred(slt), %1, %2 + %10:vcc(i1) = G_ICMP intpred(sle), %1, %2 + %11:vcc(i1) = G_ICMP intpred(ugt), %1, %2 + %12:vcc(i1) = G_ICMP intpred(uge), %1, %2 + %13:vcc(i1) = G_ICMP intpred(ult), %1, %2 + %14:vcc(i1) = G_ICMP intpred(ule), %1, %2 + %15:vgpr(i32) = G_SELECT %5, %3, %4 + %16:vgpr(i32) = G_SELECT %6, %3, %4 + %17:vgpr(i32) = G_SELECT %7, %3, %4 + %18:vgpr(i32) = G_SELECT %8, %3, %4 + %19:vgpr(i32) = G_SELECT %9, %3, %4 + %20:vgpr(i32) = G_SELECT %10, %3, %4 + %21:vgpr(i32) = G_SELECT %11, %3, %4 + %22:vgpr(i32) = G_SELECT %12, %3, %4 + %23:vgpr(i32) = G_SELECT %13, %3, %4 + %24:vgpr(i32) = G_SELECT %14, %3, %4 + G_STORE %15, %0 :: (volatile store (i32), addrspace 1) + G_STORE %16, %0 :: (volatile store (i32), addrspace 1) + G_STORE %17, %0 :: (volatile store (i32), addrspace 1) + G_STORE %18, %0 :: (volatile store (i32), addrspace 1) + G_STORE %19, %0 :: (volatile store (i32), addrspace 1) + G_STORE %20, %0 :: (volatile store (i32), addrspace 1) + G_STORE %21, %0 :: (volatile store (i32), addrspace 1) + G_STORE %22, %0 :: (volatile store (i32), addrspace 1) + G_STORE %23, %0 :: (volatile store (i32), addrspace 1) + G_STORE %24, %0 :: (volatile store (i32), addrspace 1) ... --- @@ -229,13 +229,13 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(s32) = COPY $vgpr4 - %4:vgpr(s32) = COPY $vgpr5 - %5:vcc(s1) = G_ICMP intpred(ne), %1, %2 - %6:vgpr(s32) = G_SELECT %5, %3, %4 - G_STORE %6, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(i32) = COPY $vgpr4 + %4:vgpr(i32) = COPY $vgpr5 + %5:vcc(i1) = G_ICMP intpred(ne), %1, %2 + %6:vgpr(i32) = G_SELECT %5, %3, %4 + G_STORE %6, %0 :: (store (i32), addrspace 1) ... --- @@ -253,13 +253,13 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(s32) = COPY $vgpr4 - %4:sgpr(s32) = COPY $sgpr0 - %5:vcc(s1) = G_ICMP intpred(ne), %1, %4 - %6:vgpr(s32) = G_SELECT %5, %2, %3 - G_STORE %6, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(i32) = COPY $vgpr4 + %4:sgpr(i32) = COPY $sgpr0 + %5:vcc(i1) = G_ICMP intpred(ne), %1, %4 + %6:vgpr(i32) = G_SELECT %5, %2, %3 + G_STORE %6, %0 :: (store (i32), addrspace 1) ... --- @@ -277,13 +277,13 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(s32) = COPY $vgpr4 - %4:sgpr(s32) = COPY $sgpr0 - %5:vcc(s1) = G_ICMP intpred(ne), %4, %1 - %6:vgpr(s32) = G_SELECT %5, %2, %3 - G_STORE %6, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(i32) = COPY $vgpr4 + %4:sgpr(i32) = COPY $sgpr0 + %5:vcc(i1) = G_ICMP intpred(ne), %4, %1 + %6:vgpr(i32) = G_SELECT %5, %2, %3 + G_STORE %6, %0 :: (store (i32), addrspace 1) ... --- @@ -301,13 +301,13 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0 %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = COPY $vgpr3 - %3:vgpr(s32) = COPY $vgpr4 - %4:sgpr(s32) = COPY $sgpr0 - %5:vcc(s1) = G_ICMP intpred(ne), %4, %1 - %6:vgpr(s32) = G_SELECT %5, %2, %3 - G_STORE %6, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i32) = COPY $vgpr3 + %3:vgpr(i32) = COPY $vgpr4 + %4:sgpr(i32) = COPY $sgpr0 + %5:vcc(i1) = G_ICMP intpred(ne), %4, %1 + %6:vgpr(i32) = G_SELECT %5, %2, %3 + G_STORE %6, %0 :: (store (i32), addrspace 1) ... --- @@ -326,8 +326,8 @@ body: | %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(p3) = COPY $sgpr1 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %3:sgpr(s32) = G_ICMP intpred(ne), %0, %1 + %2:sgpr(i32) = G_ICMP intpred(eq), %0, %1 + %3:sgpr(i32) = G_ICMP intpred(ne), %0, %1 S_ENDPGM 0, implicit %2, implicit %3 ... @@ -348,12 +348,12 @@ body: | %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %3:vcc(s1) = G_ICMP intpred(ne), %0, %1 - %4:vgpr(s1) = COPY %2 - %5:vgpr(s1) = COPY %3 - %6:vgpr(s32) = G_SEXT %4 - %7:vgpr(s32) = G_SEXT %5 + %2:vcc(i1) = G_ICMP intpred(eq), %0, %1 + %3:vcc(i1) = G_ICMP intpred(ne), %0, %1 + %4:vgpr(i1) = COPY %2 + %5:vgpr(i1) = COPY %3 + %6:vgpr(i32) = G_SEXT %4 + %7:vgpr(i32) = G_SEXT %5 S_ENDPGM 0, implicit %6, implicit %7 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir index d45bc31a12729..038c53fafc710 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -36,12 +36,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(eq), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -77,12 +77,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(eq), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -118,12 +118,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(eq), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -159,12 +159,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(ne), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(ne), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -200,12 +200,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(slt), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(slt), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -241,12 +241,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(sle), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(sle), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -282,12 +282,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(ult), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(ult), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -323,11 +323,10 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vcc(s1) = G_ICMP intpred(ule), %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vcc(i1) = G_ICMP intpred(ule), %2(i16), %3 + S_ENDPGM 0, implicit %4(i1) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir index 5c78c52246b25..5bf257e871723 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir @@ -8,14 +8,14 @@ # ERR-NOT: remark -# GFX6-ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(eq), %0:sgpr(s64), %1:sgpr (in function: icmp_eq_s64_ss) -# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(ne), %0:sgpr(s64), %1:sgpr (in function: icmp_ne_s64_ss) -# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(slt), %0:sgpr(s64), %1:sgpr (in function: icmp_slt_s64_ss) -# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(eq), %0:sgpr(p0), %1:sgpr (in function: icmp_eq_p0_ss) -# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(eq), %0:sgpr(p1), %1:sgpr (in function: icmp_eq_p1_ss) -# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(eq), %0:sgpr(p999), %1:sgpr (in function: icmp_eq_p999_ss) +# GFX6-ERR: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(eq), %0:sgpr(i64), %1:sgpr (in function: icmp_eq_s64_ss) +# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(ne), %0:sgpr(i64), %1:sgpr (in function: icmp_ne_s64_ss) +# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(slt), %0:sgpr(i64), %1:sgpr (in function: icmp_slt_s64_ss) +# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(eq), %0:sgpr(p0), %1:sgpr (in function: icmp_eq_p0_ss) +# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(eq), %0:sgpr(p1), %1:sgpr (in function: icmp_eq_p1_ss) +# GFX6-ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(eq), %0:sgpr(p999), %1:sgpr (in function: icmp_eq_p999_ss) -# GFX8-ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_ICMP intpred(slt), %0:sgpr(s64), %1:sgpr (in function: icmp_slt_s64_ss) +# GFX8-ERR: remark: :0:0: cannot select: %2:sgpr(i32) = G_ICMP intpred(slt), %0:sgpr(i64), %1:sgpr (in function: icmp_slt_s64_ss) # ERR-NOT: remark @@ -36,17 +36,18 @@ body: | ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; ; GFX6-LABEL: name: icmp_eq_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s64), [[COPY1]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i64), [[COPY1]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i32) = G_ICMP intpred(eq), %0(i64), %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -67,17 +68,18 @@ body: | ; GFX8-NEXT: S_CMP_LG_U64 [[COPY]], [[COPY1]], implicit-def $scc ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; ; GFX6-LABEL: name: icmp_ne_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = G_ICMP intpred(ne), %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i64), [[COPY1]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i32) = G_ICMP intpred(ne), %0(i64), %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -93,21 +95,22 @@ body: | ; GFX8-LABEL: name: icmp_slt_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) + ; ; GFX6-LABEL: name: icmp_slt_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = G_ICMP intpred(slt), %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i32) = G_ICMP intpred(slt), %0(i64), %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -129,6 +132,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_eq_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -138,12 +142,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(eq), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -165,6 +169,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_NE_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_ne_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -174,12 +179,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_NE_U64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(ne), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(ne), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -201,6 +206,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_I64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_sgt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -210,12 +216,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_I64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(sgt), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(sgt), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -237,6 +243,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_I64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_sge_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -246,12 +253,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_I64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(sge), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(sge), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -273,6 +280,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_I64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_slt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -282,12 +290,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_I64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(slt), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(slt), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -309,6 +317,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_I64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_sle_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -318,12 +327,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_I64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(sle), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(sle), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -345,6 +354,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_ugt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -354,12 +364,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_U64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(ugt), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(ugt), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -381,6 +391,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_uge_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -390,12 +401,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_U64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(uge), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(uge), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -417,6 +428,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_ult_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -426,12 +438,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_U64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(ult), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(ult), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -453,6 +465,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_ule_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -462,12 +475,12 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_U64_e64_]] ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(ule), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vcc(i1) = G_ICMP intpred(ule), %0(i64), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -488,17 +501,18 @@ body: | ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; ; GFX6-LABEL: name: icmp_eq_p0_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(p0) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - S_ENDPGM 0, implicit %2 + %2:sgpr(i32) = G_ICMP intpred(eq), %0(p0), %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -518,17 +532,18 @@ body: | ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; ; GFX6-LABEL: name: icmp_eq_p1_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](p1), [[COPY1]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](p1), [[COPY1]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(p1) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - S_ENDPGM 0, implicit %2 + %2:sgpr(i32) = G_ICMP intpred(eq), %0(p1), %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -549,17 +564,18 @@ body: | ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; ; GFX6-LABEL: name: icmp_eq_p999_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p999) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(p999) = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](p999), [[COPY1]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](p999), [[COPY1]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[ICMP]](i32) %0:sgpr(p999) = COPY $sgpr0_sgpr1 %1:sgpr(p999) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - S_ENDPGM 0, implicit %2 + %2:sgpr(i32) = G_ICMP intpred(eq), %0(p999), %1 + S_ENDPGM 0, implicit %2(i32) ... @@ -581,6 +597,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_eq_p0_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -592,10 +609,10 @@ body: | ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %2:vcc(i1) = G_ICMP intpred(eq), %0(p0), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -617,6 +634,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_eq_p1_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -628,10 +646,10 @@ body: | ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %2:vcc(i1) = G_ICMP intpred(eq), %0(p1), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... @@ -653,6 +671,7 @@ body: | ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] + ; ; GFX6-LABEL: name: icmp_eq_p999_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -664,9 +683,9 @@ body: | ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = COPY $vgpr2_vgpr3 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %3:vgpr(s1) = COPY %2 - %4:vgpr(s32) = G_ZEXT %3 - S_ENDPGM 0, implicit %4 + %2:vcc(i1) = G_ICMP intpred(eq), %0(p999), %1 + %3:vgpr(i1) = COPY %2(i1) + %4:vgpr(i32) = G_ZEXT %3(i1) + S_ENDPGM 0, implicit %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir index d371c56f55b86..a2b680878b086 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir @@ -12,8 +12,8 @@ body: | ; GCN-LABEL: name: implicit_def_s32_sgpr ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:sgpr(s32) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:sgpr(i32) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i32) ... --- @@ -26,8 +26,8 @@ body: | ; GCN-LABEL: name: implicit_def_s32_vgpr ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:vgpr(s32) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:vgpr(i32) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i32) ... --- @@ -41,8 +41,8 @@ body: | ; GCN-LABEL: name: implicit_def_s64_sgpr ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:sgpr(s64) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:sgpr(i64) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i64) ... --- @@ -56,8 +56,8 @@ body: | ; GCN-LABEL: name: implicit_def_s64_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:vgpr(s64) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:vgpr(i64) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i64) ... --- @@ -71,7 +71,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] %0:sgpr(p0) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) ... --- @@ -85,7 +85,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] %0:vgpr(p0) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) ... --- @@ -99,10 +99,10 @@ body: | ; GCN-LABEL: name: implicit_def_p1_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) %0:vgpr(p1) = G_IMPLICIT_DEF - %1:vgpr(s32) = G_CONSTANT i32 4 - G_STORE %1, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = G_CONSTANT i32 4 + G_STORE %1(i32), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -117,10 +117,10 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GCN-NEXT: $m0 = S_MOV_B32 -1 - ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) %0:vgpr(p3) = G_IMPLICIT_DEF - %1:vgpr(s32) = G_CONSTANT i32 4 - G_STORE %1, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = G_CONSTANT i32 4 + G_STORE %1(i32), %0(p3) :: (store (i32), addrspace 1) ... --- @@ -134,10 +134,10 @@ body: | ; GCN-LABEL: name: implicit_def_p4_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) %0:vgpr(p4) = G_IMPLICIT_DEF - %1:vgpr(s32) = G_CONSTANT i32 4 - G_STORE %1, %0 :: (store (s32), addrspace 1) + %1:vgpr(i32) = G_CONSTANT i32 4 + G_STORE %1(i32), %0(p4) :: (store (i32), addrspace 1) ... --- @@ -151,8 +151,8 @@ body: | ; GCN-LABEL: name: implicit_def_s1_vgpr ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:vgpr(s1) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:vgpr(i1) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i1) ... --- @@ -166,8 +166,8 @@ body: | ; GCN-LABEL: name: implicit_def_s1_sgpr ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:sgpr(s1) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:sgpr(i1) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i1) ... --- @@ -181,8 +181,8 @@ body: | ; GCN-LABEL: name: implicit_def_s1_vcc ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:vcc(s1) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:vcc(i1) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i1) ... --- @@ -196,8 +196,8 @@ body: | ; GCN-LABEL: name: implicit_def_s1024_sgpr ; GCN: [[DEF:%[0-9]+]]:sgpr_1024 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:sgpr(s1024) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:sgpr(i1024) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i1024) ... --- @@ -210,6 +210,6 @@ body: | ; GCN-LABEL: name: implicit_def_s1024_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]] - %0:vgpr(s1024) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %0 + %0:vgpr(i1024) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i1024) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir index f863b0c4508ad..5e833dc499cf7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -32,11 +32,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s32) = COPY $sgpr3 - %3:sgpr(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i32) = COPY $sgpr3 + %3:sgpr(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<2 x i32>) ... --- @@ -67,11 +67,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s32) = COPY $sgpr3 - %2:sgpr(s32) = COPY $sgpr4 - %3:sgpr(<3 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<3 x i32>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(i32) = COPY $sgpr3 + %2:sgpr(i32) = COPY $sgpr4 + %3:sgpr(<3 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<3 x i32>) ... --- @@ -102,11 +102,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = COPY $sgpr3 - %2:sgpr(s32) = COPY $sgpr4 - %3:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = COPY $sgpr3 + %2:sgpr(i32) = COPY $sgpr4 + %3:sgpr(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<4 x i32>) ... --- @@ -137,11 +137,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] - %0:sgpr(<5 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - %1:sgpr(s32) = COPY $sgpr5 - %2:sgpr(s32) = COPY $sgpr6 - %3:sgpr(<5 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<5 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(i32) = COPY $sgpr5 + %2:sgpr(i32) = COPY $sgpr6 + %3:sgpr(<5 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<5 x i32>) ... --- @@ -172,11 +172,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = COPY $sgpr9 - %3:sgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = COPY $sgpr9 + %3:sgpr(<8 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<8 x i32>) ... --- @@ -207,11 +207,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V9_:%[0-9]+]]:sgpr_288 = S_INDIRECT_REG_WRITE_MOVREL_B32_V9 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V9_]] - %0:sgpr(<9 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 - %1:sgpr(s32) = COPY $sgpr9 - %2:sgpr(s32) = COPY $sgpr10 - %3:sgpr(<9 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<9 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 + %1:sgpr(i32) = COPY $sgpr9 + %2:sgpr(i32) = COPY $sgpr10 + %3:sgpr(<9 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<9 x i32>) ... --- @@ -242,11 +242,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V10_:%[0-9]+]]:sgpr_320 = S_INDIRECT_REG_WRITE_MOVREL_B32_V10 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V10_]] - %0:sgpr(<10 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - %1:sgpr(s32) = COPY $sgpr10 - %2:sgpr(s32) = COPY $sgpr11 - %3:sgpr(<10 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<10 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + %1:sgpr(i32) = COPY $sgpr10 + %2:sgpr(i32) = COPY $sgpr11 + %3:sgpr(<10 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<10 x i32>) ... --- @@ -277,11 +277,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V11_:%[0-9]+]]:sgpr_352 = S_INDIRECT_REG_WRITE_MOVREL_B32_V11 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V11_]] - %0:sgpr(<11 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - %1:sgpr(s32) = COPY $sgpr11 - %2:sgpr(s32) = COPY $sgpr12 - %3:sgpr(<11 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<11 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + %1:sgpr(i32) = COPY $sgpr11 + %2:sgpr(i32) = COPY $sgpr12 + %3:sgpr(<11 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<11 x i32>) ... --- @@ -312,11 +312,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V12_:%[0-9]+]]:sgpr_384 = S_INDIRECT_REG_WRITE_MOVREL_B32_V12 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V12_]] - %0:sgpr(<12 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - %1:sgpr(s32) = COPY $sgpr12 - %2:sgpr(s32) = COPY $sgpr13 - %3:sgpr(<12 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<12 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + %1:sgpr(i32) = COPY $sgpr12 + %2:sgpr(i32) = COPY $sgpr13 + %3:sgpr(<12 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<12 x i32>) ... --- @@ -347,11 +347,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] - %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s32) = COPY $sgpr16 - %2:sgpr(s32) = COPY $sgpr17 - %3:sgpr(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i32) = COPY $sgpr16 + %2:sgpr(i32) = COPY $sgpr17 + %3:sgpr(<16 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<16 x i32>) ... --- @@ -382,11 +382,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] - %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %1:sgpr(s32) = COPY $sgpr40 - %2:sgpr(s32) = COPY $sgpr41 - %3:sgpr(<32 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(i32) = COPY $sgpr40 + %2:sgpr(i32) = COPY $sgpr41 + %3:sgpr(<32 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<32 x i32>) ... --- @@ -417,11 +417,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] - %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s64) = COPY $sgpr4_sgpr5 - %2:sgpr(s32) = COPY $sgpr6 - %3:sgpr(<2 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<2 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i64) = COPY $sgpr4_sgpr5 + %2:sgpr(i32) = COPY $sgpr6 + %3:sgpr(<2 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + S_ENDPGM 0, implicit %3(<2 x i64>) ... --- @@ -452,11 +452,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] - %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s64) = COPY $sgpr8_sgpr9 - %2:sgpr(s32) = COPY $sgpr10 - %3:sgpr(<4 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<4 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i64) = COPY $sgpr8_sgpr9 + %2:sgpr(i32) = COPY $sgpr10 + %3:sgpr(<4 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + S_ENDPGM 0, implicit %3(<4 x i64>) ... --- @@ -487,11 +487,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] - %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s64) = COPY $sgpr16_sgpr17 - %2:sgpr(s32) = COPY $sgpr18 - %3:sgpr(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i64) = COPY $sgpr16_sgpr17 + %2:sgpr(i32) = COPY $sgpr18 + %3:sgpr(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + S_ENDPGM 0, implicit %3(<8 x i64>) ... --- @@ -522,11 +522,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] - %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %1:sgpr(s64) = COPY $sgpr40_sgpr41 - %2:sgpr(s32) = COPY $sgpr42 - %3:sgpr(<16 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<16 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(i64) = COPY $sgpr40_sgpr41 + %2:sgpr(i32) = COPY $sgpr42 + %3:sgpr(<16 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + S_ENDPGM 0, implicit %3(<16 x i64>) ... --- @@ -556,11 +556,11 @@ body: | ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_]] - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:sgpr(s32) = COPY $sgpr3 - %3:vgpr(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:sgpr(i32) = COPY $sgpr3 + %3:vgpr(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<2 x i32>) ... --- @@ -590,11 +590,11 @@ body: | ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_]] - %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(s32) = COPY $vgpr3 - %2:sgpr(s32) = COPY $sgpr4 - %3:vgpr(<3 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(i32) = COPY $vgpr3 + %2:sgpr(i32) = COPY $sgpr4 + %3:vgpr(<3 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<3 x i32>) ... --- @@ -624,11 +624,11 @@ body: | ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] - %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(s32) = COPY $vgpr3 - %2:sgpr(s32) = COPY $sgpr4 - %3:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(i32) = COPY $vgpr3 + %2:sgpr(i32) = COPY $sgpr4 + %3:vgpr(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<4 x i32>) ... --- @@ -658,11 +658,11 @@ body: | ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_]] - %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - %1:vgpr(s32) = COPY $vgpr5 - %2:sgpr(s32) = COPY $sgpr6 - %3:vgpr(<5 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:vgpr(i32) = COPY $vgpr5 + %2:sgpr(i32) = COPY $sgpr6 + %3:vgpr(<5 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<5 x i32>) ... --- @@ -692,11 +692,11 @@ body: | ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s32) = COPY $vgpr8 - %2:sgpr(s32) = COPY $sgpr9 - %3:vgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i32) = COPY $vgpr8 + %2:sgpr(i32) = COPY $sgpr9 + %3:vgpr(<8 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<8 x i32>) ... --- @@ -726,13 +726,13 @@ body: | ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 11, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s32) = COPY $vgpr8 - %2:sgpr(s32) = COPY $sgpr9 - %3:sgpr(s32) = G_CONSTANT i32 1 - %4:sgpr(s32) = G_ADD %2, %3 - %5:vgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i32) = COPY $vgpr8 + %2:sgpr(i32) = COPY $sgpr9 + %3:sgpr(i32) = G_CONSTANT i32 1 + %4:sgpr(i32) = G_ADD %2, %3 + %5:vgpr(<8 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %4(i32) + S_ENDPGM 0, implicit %5(<8 x i32>) ... --- @@ -766,13 +766,13 @@ body: | ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] - %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s32) = COPY $vgpr8 - %2:sgpr(s32) = COPY $sgpr9 - %3:sgpr(s32) = G_CONSTANT i32 8 - %4:sgpr(s32) = G_ADD %2, %3 - %5:vgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i32) = COPY $vgpr8 + %2:sgpr(i32) = COPY $sgpr9 + %3:sgpr(i32) = G_CONSTANT i32 8 + %4:sgpr(i32) = G_ADD %2, %3 + %5:vgpr(<8 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %4(i32) + S_ENDPGM 0, implicit %5(<8 x i32>) ... --- @@ -803,13 +803,13 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = COPY $sgpr9 - %3:sgpr(s32) = G_CONSTANT i32 1 - %4:sgpr(s32) = G_ADD %2, %3 - %5:sgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = COPY $sgpr9 + %3:sgpr(i32) = G_CONSTANT i32 1 + %4:sgpr(i32) = G_ADD %2, %3 + %5:sgpr(<8 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %4(i32) + S_ENDPGM 0, implicit %5(<8 x i32>) ... --- @@ -844,13 +844,13 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s32) = COPY $sgpr8 - %2:sgpr(s32) = COPY $sgpr9 - %3:sgpr(s32) = G_CONSTANT i32 8 - %4:sgpr(s32) = G_ADD %2, %3 - %5:sgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i32) = COPY $sgpr8 + %2:sgpr(i32) = COPY $sgpr9 + %3:sgpr(i32) = G_CONSTANT i32 8 + %4:sgpr(i32) = G_ADD %2, %3 + %5:sgpr(<8 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %4(i32) + S_ENDPGM 0, implicit %5(<8 x i32>) ... # This should have been folded out in the legalizer, but make sure it @@ -883,11 +883,11 @@ body: | ; GPRIDX-NEXT: $m0 = COPY [[S_MOV_B32_]] ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = COPY $sgpr4 - %2:sgpr(s32) = G_CONSTANT i32 0 - %3:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = COPY $sgpr4 + %2:sgpr(i32) = G_CONSTANT i32 0 + %3:sgpr(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<4 x i32>) ... --- @@ -917,9 +917,9 @@ body: | ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] - %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32) = COPY $sgpr4 - %2:sgpr(s32) = G_CONSTANT i32 0 - %3:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32) = COPY $sgpr4 + %2:sgpr(i32) = G_CONSTANT i32 0 + %3:vgpr(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index 0ae0b70b76d5a..891d393ca5c10 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -30,25 +30,25 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG15:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %0:sgpr(s512) = G_IMPLICIT_DEF - %1:sgpr(s32) = G_IMPLICIT_DEF - %2:sgpr(s512) = G_INSERT %0:sgpr, %1:sgpr(s32), 0 - %3:sgpr(s512) = G_INSERT %2:sgpr, %1:sgpr(s32), 32 - %4:sgpr(s512) = G_INSERT %3:sgpr, %1:sgpr(s32), 64 - %5:sgpr(s512) = G_INSERT %4:sgpr, %1:sgpr(s32), 96 - %6:sgpr(s512) = G_INSERT %5:sgpr, %1:sgpr(s32), 128 - %7:sgpr(s512) = G_INSERT %6:sgpr, %1:sgpr(s32), 160 - %8:sgpr(s512) = G_INSERT %7:sgpr, %1:sgpr(s32), 192 - %9:sgpr(s512) = G_INSERT %8:sgpr, %1:sgpr(s32), 224 - %10:sgpr(s512) = G_INSERT %9:sgpr, %1:sgpr(s32), 256 - %11:sgpr(s512) = G_INSERT %10:sgpr, %1:sgpr(s32), 288 - %12:sgpr(s512) = G_INSERT %11:sgpr, %1:sgpr(s32), 320 - %13:sgpr(s512) = G_INSERT %12:sgpr, %1:sgpr(s32), 352 - %14:sgpr(s512) = G_INSERT %13:sgpr, %1:sgpr(s32), 384 - %15:sgpr(s512) = G_INSERT %14:sgpr, %1:sgpr(s32), 416 - %16:sgpr(s512) = G_INSERT %15:sgpr, %1:sgpr(s32), 448 - %17:sgpr(s512) = G_INSERT %16:sgpr, %1:sgpr(s32), 480 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17:sgpr(s512) + %0:sgpr(i512) = G_IMPLICIT_DEF + %1:sgpr(i32) = G_IMPLICIT_DEF + %2:sgpr(i512) = G_INSERT %0, %1(i32), 0 + %3:sgpr(i512) = G_INSERT %2, %1(i32), 32 + %4:sgpr(i512) = G_INSERT %3, %1(i32), 64 + %5:sgpr(i512) = G_INSERT %4, %1(i32), 96 + %6:sgpr(i512) = G_INSERT %5, %1(i32), 128 + %7:sgpr(i512) = G_INSERT %6, %1(i32), 160 + %8:sgpr(i512) = G_INSERT %7, %1(i32), 192 + %9:sgpr(i512) = G_INSERT %8, %1(i32), 224 + %10:sgpr(i512) = G_INSERT %9, %1(i32), 256 + %11:sgpr(i512) = G_INSERT %10, %1(i32), 288 + %12:sgpr(i512) = G_INSERT %11, %1(i32), 320 + %13:sgpr(i512) = G_INSERT %12, %1(i32), 352 + %14:sgpr(i512) = G_INSERT %13, %1(i32), 384 + %15:sgpr(i512) = G_INSERT %14, %1(i32), 416 + %16:sgpr(i512) = G_INSERT %15, %1(i32), 448 + %17:sgpr(i512) = G_INSERT %16, %1(i32), 480 + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17(i512) SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ... @@ -68,10 +68,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_INSERT %0, %1(i32), 0 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -90,10 +90,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_INSERT %0, %1(i32), 32 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -112,10 +112,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_INSERT %0, %1(i32), 0 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -134,10 +134,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_INSERT %0, %1(i32), 32 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -156,10 +156,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_INSERT %0, %1(i32), 32 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -178,10 +178,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s64) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i64) = G_INSERT %0, %1(i32), 32 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -200,10 +200,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(s64) = COPY $vgpr3_vgpr4 - %2:vgpr(s96) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:vgpr(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(i64) = COPY $vgpr3_vgpr4 + %2:vgpr(i96) = G_INSERT %0, %1(i64), 0 + S_ENDPGM 0, implicit %2(i96) ... --- @@ -222,10 +222,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(s64) = COPY $vgpr3_vgpr4 - %2:vgpr(s96) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 + %0:vgpr(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(i64) = COPY $vgpr3_vgpr4 + %2:vgpr(i96) = G_INSERT %0, %1(i64), 32 + S_ENDPGM 0, implicit %2(i96) ... --- @@ -244,10 +244,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s64) = COPY $sgpr4_sgpr5 - %2:sgpr(s96) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(i64) = COPY $sgpr4_sgpr5 + %2:sgpr(i96) = G_INSERT %0, %1(i64), 0 + S_ENDPGM 0, implicit %2(i96) ... --- @@ -266,10 +266,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s64) = COPY $sgpr4_sgpr5 - %2:sgpr(s128) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i64) = COPY $sgpr4_sgpr5 + %2:sgpr(i128) = G_INSERT %0, %1(i64), 0 + S_ENDPGM 0, implicit %2(i128) ... # --- @@ -281,9 +281,9 @@ body: | # body: | # bb.0: # liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 -# %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 -# %1:sgpr(s64) = COPY $sgpr4_sgpr5 -# %2:sgpr(s128) = G_INSERT %0, %1, 32 +# %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 +# %1:sgpr(i64) = COPY $sgpr4_sgpr5 +# %2:sgpr(i128) = G_INSERT %0, %1, 32 # S_ENDPGM 0, implicit %2 # ... @@ -303,10 +303,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s64) = COPY $sgpr4_sgpr5 - %2:sgpr(s128) = G_INSERT %0, %1, 64 - S_ENDPGM 0, implicit %2 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i64) = COPY $sgpr4_sgpr5 + %2:sgpr(i128) = G_INSERT %0, %1(i64), 64 + S_ENDPGM 0, implicit %2(i128) ... --- @@ -325,10 +325,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s64) = COPY $vgpr8_vgpr9 - %2:vgpr(s256) = G_INSERT %0, %1, 96 - S_ENDPGM 0, implicit %2 + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i64) = COPY $vgpr8_vgpr9 + %2:vgpr(i256) = G_INSERT %0, %1(i64), 96 + S_ENDPGM 0, implicit %2(i256) ... --- @@ -347,10 +347,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s64) = COPY $sgpr4_sgpr5 - %2:sgpr(s256) = G_INSERT %0, %1, 128 - S_ENDPGM 0, implicit %2 + %0:sgpr(i256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i64) = COPY $sgpr4_sgpr5 + %2:sgpr(i256) = G_INSERT %0, %1(i64), 128 + S_ENDPGM 0, implicit %2(i256) ... # --- @@ -362,9 +362,9 @@ body: | # body: | # bb.0: # liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 -# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 -# %1:sgpr(s64) = COPY $sgpr4_sgpr5 -# %2:sgpr(s256) = G_INSERT %0, %1, 160 +# %0:sgpr(i256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 +# %1:sgpr(i64) = COPY $sgpr4_sgpr5 +# %2:sgpr(i256) = G_INSERT %0, %1, 160 # S_ENDPGM 0, implicit %2 # ... @@ -384,10 +384,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr4_sgpr5_sgpr6 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s96) = COPY $sgpr4_sgpr5_sgpr6 - %2:sgpr(s128) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i96) = COPY $sgpr4_sgpr5_sgpr6 + %2:sgpr(i128) = G_INSERT %0, %1(i96), 0 + S_ENDPGM 0, implicit %2(i128) ... --- @@ -406,10 +406,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr8_sgpr9_sgpr10 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - %1:sgpr(s96) = COPY $sgpr8_sgpr9_sgpr10 - %2:sgpr(s160) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(i96) = COPY $sgpr8_sgpr9_sgpr10 + %2:sgpr(i160) = G_INSERT %0, %1(i96), 0 + S_ENDPGM 0, implicit %2(i160) ... --- @@ -429,10 +429,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - %2:sgpr(s256) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:sgpr(i256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 + %2:sgpr(i256) = G_INSERT %0, %1(i128), 0 + S_ENDPGM 0, implicit %2(i256) ... --- @@ -452,10 +452,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %2:vgpr(s256) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(i256) = G_INSERT %0, %1(i128), 32 + S_ENDPGM 0, implicit %2(i256) ... --- @@ -475,10 +475,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %2:vgpr(s256) = G_INSERT %0, %1, 64 - S_ENDPGM 0, implicit %2 + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(i256) = G_INSERT %0, %1(i128), 64 + S_ENDPGM 0, implicit %2(i256) ... --- @@ -498,10 +498,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %2:vgpr(s256) = G_INSERT %0, %1, 96 - S_ENDPGM 0, implicit %2 + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(i256) = G_INSERT %0, %1(i128), 96 + S_ENDPGM 0, implicit %2(i256) ... --- @@ -521,8 +521,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %2:vgpr(s256) = G_INSERT %0, %1, 128 - S_ENDPGM 0, implicit %2 + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(i256) = G_INSERT %0, %1(i128), 128 + S_ENDPGM 0, implicit %2(i256) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir index 96606e2653d8c..4d738d1b857fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir @@ -15,11 +15,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_TRUNC_F32_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC_TRUNC %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -36,11 +38,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_TRUNC_F32_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:vgpr(f32) = G_INTRINSIC_TRUNC %1 + %3:vgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -57,11 +61,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[V_TRUNC_F64_e64_]] + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC_TRUNC %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -78,9 +84,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[V_TRUNC_F64_e64_]] + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(f64) = G_BITCAST %0(i64) + %2:vgpr(f64) = G_INTRINSIC_TRUNC %1 + %3:vgpr(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir index 3e352e1e14557..ba096597164ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir @@ -15,13 +15,15 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC_TRUNC %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -38,13 +40,15 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_INTRINSIC_TRUNC %2 + %4:vgpr(i16) = G_BITCAST %3(f16) + %5:vgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -61,12 +65,14 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s16) = G_INTRINSIC_TRUNC %2 - %4:vgpr(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(f16) = G_BITCAST %1(i16) + %3:vgpr(f16) = G_FNEG %2 + %4:vgpr(f16) = G_INTRINSIC_TRUNC %3 + %5:vgpr(i16) = G_BITCAST %4(f16) + %6:vgpr(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir index 81fdf2fe0e78a..39b5a5bd2b8b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir @@ -17,11 +17,11 @@ regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = COPY $vgpr0_vgpr1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i64) = COPY $vgpr0_vgpr1 %2:sgpr(p4) = G_INTTOPTR %0 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), addrspace 1) + %3:sgpr(i32) = G_LOAD %2 :: (load (i32), addrspace 1) %4:vgpr(p0) = G_INTTOPTR %1 - %5:vgpr(s32) = COPY %3 - G_STORE %5, %4 :: (store (s32), addrspace 1) + %5:vgpr(i32) = COPY %3 + G_STORE %5, %4 :: (store (i32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index cebdffc74847c..1bff76714caa2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -20,39 +20,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p0) :: (load seq_cst (i32)) + $vgpr0 = COPY %1(i32) ... @@ -71,39 +71,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i16>)) + ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i16>)) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i16>)) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i16>)) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i16>)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p0) :: (load seq_cst (<2 x i16>)) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -153,8 +153,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p0) :: (load seq_cst (p3)) + $vgpr0 = COPY %1(p3) ... @@ -173,39 +173,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64)) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p0) :: (load seq_cst (i64)) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -224,39 +224,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i32>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i32>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i32>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x i32>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p0) :: (load seq_cst (<2 x i32>)) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -275,39 +275,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x i16>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x i16>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x i16>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x i16>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x i16>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p0) :: (load seq_cst (<4 x i16>)) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -357,8 +357,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p0) :: (load seq_cst (p1)) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -408,8 +408,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p0) = G_LOAD %0(p0) :: (load seq_cst (p0)) + $vgpr0_vgpr1 = COPY %1(p0) ... @@ -436,7 +436,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 @@ -451,7 +451,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 @@ -466,7 +466,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 @@ -481,20 +481,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p0) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2048 + %2:vgpr(p0) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p0) :: (load seq_cst (i32)) + $vgpr0 = COPY %3(i32) ... @@ -521,14 +521,14 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 @@ -543,27 +543,26 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p0) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4095 + %2:vgpr(p0) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p0) :: (load seq_cst (i32)) + $vgpr0 = COPY %3(i32) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index eafc96dd32bdd..810225a5a1608 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -27,7 +27,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst @@ -39,32 +39,32 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load seq_cst (i32), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -83,39 +83,39 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i16>), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i16>), addrspace 1) + ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i16>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i16>), addrspace 1) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i16>), addrspace 1) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p1) :: (load seq_cst (<2 x i16>), addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -165,8 +165,8 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p1) :: (load seq_cst (p3), addrspace 1) + $vgpr0 = COPY %1(p3) ... @@ -190,7 +190,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst @@ -202,32 +202,32 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p1) :: (load seq_cst (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -246,39 +246,39 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i32>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i32>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i32>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i32>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x i32>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p1) :: (load seq_cst (<2 x i32>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -297,39 +297,39 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x i16>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x i16>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x i16>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x i16>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x i16>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p1) :: (load seq_cst (<4 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -379,8 +379,8 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p1) :: (load seq_cst (p1), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -430,8 +430,8 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p0) = G_LOAD %0(p1) :: (load seq_cst (p0), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p0) ... @@ -463,7 +463,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 @@ -483,7 +483,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 @@ -498,27 +498,27 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load seq_cst (i32), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -542,7 +542,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 @@ -554,7 +554,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 @@ -569,14 +569,14 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 @@ -591,13 +591,13 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load seq_cst (i32), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -629,7 +629,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 @@ -649,7 +649,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 @@ -664,26 +664,26 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (i64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s64) = G_LOAD %2 :: (load seq_cst (s64), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i64) = G_LOAD %2(p1) :: (load seq_cst (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir index 5c2df3904b817..444fc0b3f3b78 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -22,24 +22,26 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p3) :: (load seq_cst (i32), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -59,24 +61,26 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x i16>), addrspace 3) + ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) + ; ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x i16>), addrspace 3) + ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) + ; ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x i16>), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p3) :: (load seq_cst (<2 x i16>), addrspace 3) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -98,6 +102,7 @@ body: | ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} @@ -105,6 +110,7 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -112,8 +118,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p3) :: (load seq_cst (p3), addrspace 3) + $vgpr0 = COPY %1(p3) ... @@ -133,24 +139,26 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (i64), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (i64), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (i64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p3) :: (load seq_cst (i64), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -170,24 +178,26 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x i32>), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) + ; ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x i32>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) + ; ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x i32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p3) :: (load seq_cst (<2 x i32>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -207,24 +217,26 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x i16>), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) + ; ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x i16>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) + ; ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x i16>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p3) :: (load seq_cst (<4 x i16>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -246,6 +258,7 @@ body: | ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} @@ -253,6 +266,7 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -260,8 +274,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p3) :: (load seq_cst (p1), addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -283,6 +297,7 @@ body: | ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} @@ -290,6 +305,7 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -297,8 +313,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p0) = G_LOAD %0(p3) :: (load seq_cst (p0), addrspace 3) + $vgpr0_vgpr1 = COPY %1(p0) ... @@ -320,25 +336,27 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 65535 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 65535 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p3) :: (load seq_cst (i32), addrspace 3) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index ada80da490fc5..6dc095ec7e240 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -21,39 +21,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_s32_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_s32_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %1 + %1:sgpr(i32) = G_LOAD %0(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %1(i32) ... @@ -73,39 +73,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_v2s16_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_v2s16_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_v2s16_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_v2s16_from_4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) - $sgpr0 = COPY %1 + %1:sgpr(<2 x i16>) = G_LOAD %0(p4) :: (load (<2 x i16>), addrspace 4) + $sgpr0 = COPY %1(<2 x i16>) ... @@ -124,39 +124,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX7-LABEL: name: load_constant_v2s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX8-LABEL: name: load_constant_v2s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX10-LABEL: name: load_constant_v2s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX11-LABEL: name: load_constant_v2s32 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), addrspace 4) + $sgpr0_sgpr1 = COPY %1(<2 x i32>) ... @@ -174,39 +174,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX7-LABEL: name: load_constant_v2s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX8-LABEL: name: load_constant_v2s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX10-LABEL: name: load_constant_v2s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (load (<2 x i32>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v2s32_align4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x i32>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 4, addrspace 4) + $sgpr0_sgpr1 = COPY %1(<2 x i32>) ... @@ -224,39 +224,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX7-LABEL: name: load_constant_v4s16_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX8-LABEL: name: load_constant_v4s16_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX10-LABEL: name: load_constant_v4s16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (load (<4 x i16>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v4s16_align4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(<4 x i16>) = G_LOAD %0(p4) :: (load (<4 x i16>), align 4, addrspace 4) + $sgpr0_sgpr1 = COPY %1(<4 x i16>) ... @@ -275,39 +275,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x i32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX7-LABEL: name: load_constant_v4s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x i32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX8-LABEL: name: load_constant_v4s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x i32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX10-LABEL: name: load_constant_v4s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (load (<4 x i32>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v4s32_align4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x i32>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 + %1:sgpr(<4 x i32>) = G_LOAD %0(p4) :: (load (<4 x i32>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1(<4 x i32>) ... @@ -326,39 +326,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX7-LABEL: name: load_constant_s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX8-LABEL: name: load_constant_s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX10-LABEL: name: load_constant_s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX11-LABEL: name: load_constant_s64 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(i64) = G_LOAD %0(p4) :: (load (i64), addrspace 4) + $sgpr0_sgpr1 = COPY %1(i64) ... @@ -377,39 +377,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX7-LABEL: name: load_constant_s64_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX8-LABEL: name: load_constant_s64_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX10-LABEL: name: load_constant_s64_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (load (i64), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1 = COPY %1 ; ; GFX11-LABEL: name: load_constant_s64_align4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (i64), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(i64) = G_LOAD %0(p4) :: (load (i64), align 4, addrspace 4) + $sgpr0_sgpr1 = COPY %1(i64) ... @@ -428,39 +428,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x i64>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX7-LABEL: name: load_constant_v2s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x i64>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX8-LABEL: name: load_constant_v2s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x i64>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX10-LABEL: name: load_constant_v2s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (load (<2 x i64>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v2s64 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x i64>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 + %1:sgpr(<2 x i64>) = G_LOAD %0(p4) :: (load (<2 x i64>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1(<2 x i64>) ... @@ -510,8 +510,8 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 + %1:sgpr(<2 x p1>) = G_LOAD %0(p4) :: (load (<2 x p1>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1(<2 x p1>) ... @@ -530,39 +530,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(i128) = G_LOAD [[COPY]](p4) :: (load (i128), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](i128) ; ; GFX7-LABEL: name: load_constant_s128_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(i128) = G_LOAD [[COPY]](p4) :: (load (i128), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](i128) ; ; GFX8-LABEL: name: load_constant_s128_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(i128) = G_LOAD [[COPY]](p4) :: (load (i128), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](i128) ; ; GFX10-LABEL: name: load_constant_s128_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(i128) = G_LOAD [[COPY]](p4) :: (load (i128), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](i128) ; ; GFX11-LABEL: name: load_constant_s128_align4 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(i128) = G_LOAD [[COPY]](p4) :: (load (i128), align 4, addrspace 4) + ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](i128) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 + %1:sgpr(i128) = G_LOAD %0(p4) :: (load (i128), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1(i128) ... @@ -612,8 +612,8 @@ body: | ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4) - $sgpr0 = COPY %1 + %1:sgpr(p3) = G_LOAD %0(p4) :: (load (p3), addrspace 4) + $sgpr0 = COPY %1(p3) ... @@ -663,8 +663,8 @@ body: | ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(p4) = G_LOAD %0(p4) :: (load (p4), addrspace 4) + $sgpr0_sgpr1 = COPY %1(p4) ... @@ -714,8 +714,8 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(p999) = G_LOAD %0(p4) :: (load (p999), addrspace 4) + $sgpr0_sgpr1 = COPY %1(p999) ... @@ -765,8 +765,8 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(<2 x p3>) = G_LOAD %0(p4) :: (load (<2 x p3>), addrspace 4) + $sgpr0_sgpr1 = COPY %1(<2 x p3>) ... @@ -785,39 +785,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_v2s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_v2s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_v2s16 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x i16>), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) - $sgpr0 = COPY %1 + %1:sgpr(<2 x i16>) = G_LOAD %0(p4) :: (load (<2 x i16>), addrspace 4) + $sgpr0 = COPY %1(<2 x i16>) ... @@ -836,39 +836,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX7-LABEL: name: load_constant_v4s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX8-LABEL: name: load_constant_v4s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX10-LABEL: name: load_constant_v4s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; ; GFX11-LABEL: name: load_constant_v4s16 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x i16>), addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4) - $sgpr0_sgpr1 = COPY %1 + %1:sgpr(<4 x i16>) = G_LOAD %0(p4) :: (load (<4 x i16>), addrspace 4) + $sgpr0_sgpr1 = COPY %1(<4 x i16>) ... @@ -887,39 +887,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x i16>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX7-LABEL: name: load_constant_v8s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x i16>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX8-LABEL: name: load_constant_v8s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x i16>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; ; GFX10-LABEL: name: load_constant_v8s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sgpr_128 = S_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (load (<8 x i16>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v8s16 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x i16>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 + %1:sgpr(<8 x i16>) = G_LOAD %0(p4) :: (load (<8 x i16>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1(<8 x i16>) ... @@ -938,39 +938,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x i32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] ; ; GFX7-LABEL: name: load_constant_v8s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x i32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] ; ; GFX8-LABEL: name: load_constant_v8s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x i32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] ; ; GFX10-LABEL: name: load_constant_v8s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sgpr_256 = S_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sgpr_256 = S_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (load (<8 x i32>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v8s32 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x i32>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1 + %1:sgpr(<8 x i32>) = G_LOAD %0(p4) :: (load (<8 x i32>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1(<8 x i32>) ... @@ -989,39 +989,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x i32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; ; GFX7-LABEL: name: load_constant_v16s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x i32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; ; GFX8-LABEL: name: load_constant_v16s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x i32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; ; GFX10-LABEL: name: load_constant_v16s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sgpr_512 = S_LOAD_DWORDX16_IMM_ec [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sgpr_512 = S_LOAD_DWORDX16_IMM_ec [[COPY]], 0, 0 :: (load (<16 x i32>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v16s32 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x i32>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 + %1:sgpr(<16 x i32>) = G_LOAD %0(p4) :: (load (<16 x i32>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1(<16 x i32>) ... @@ -1040,39 +1040,39 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x i64>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; ; GFX7-LABEL: name: load_constant_v8s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x i64>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; ; GFX8-LABEL: name: load_constant_v8s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x i64>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; ; GFX10-LABEL: name: load_constant_v8s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: early-clobber %1:sgpr_512 = S_LOAD_DWORDX16_IMM_ec [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: early-clobber %1:sgpr_512 = S_LOAD_DWORDX16_IMM_ec [[COPY]], 0, 0 :: (load (<8 x i64>), align 4, addrspace 4) ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 ; ; GFX11-LABEL: name: load_constant_v8s64 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x i64>), align 4, addrspace 4) ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), align 4, addrspace 4) - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 + %1:sgpr(<8 x i64>) = G_LOAD %0(p4) :: (load (<8 x i64>), align 4, addrspace 4) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1(<8 x i64>) ... @@ -1095,41 +1095,41 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 1020 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 1020 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... @@ -1149,41 +1149,41 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 1024 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 1024 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... @@ -1203,7 +1203,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 @@ -1211,14 +1211,14 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575 @@ -1226,7 +1226,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1048575 @@ -1234,13 +1234,13 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 1048575 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 1048575 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... @@ -1260,14 +1260,14 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576 @@ -1275,7 +1275,7 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576 @@ -1283,7 +1283,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1048576 @@ -1291,13 +1291,13 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 1048576 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 1048576 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... @@ -1317,7 +1317,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 @@ -1325,7 +1325,7 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 @@ -1333,7 +1333,7 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823 @@ -1341,7 +1341,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1073741823 @@ -1349,13 +1349,13 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 1073741823 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 1073741823 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... @@ -1382,7 +1382,7 @@ body: | ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1 @@ -1397,7 +1397,7 @@ body: | ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1 @@ -1412,7 +1412,7 @@ body: | ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1 @@ -1427,7 +1427,7 @@ body: | ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_negative_1 @@ -1442,13 +1442,13 @@ body: | ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -1 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 -1 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... @@ -1475,7 +1475,7 @@ body: | ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 @@ -1490,7 +1490,7 @@ body: | ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 @@ -1505,7 +1505,7 @@ body: | ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 @@ -1520,7 +1520,7 @@ body: | ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX11-LABEL: name: load_constant_s32_from_4_gep_negative_524288 @@ -1535,12 +1535,12 @@ body: | ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) + ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (i32), addrspace 4) ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -524288 - %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) - $sgpr0 = COPY %3 + %1:sgpr(i64) = G_CONSTANT i64 -524288 + %2:sgpr(p4) = G_PTR_ADD %0, %1(i64) + %3:sgpr(i32) = G_LOAD %2(p4) :: (load (i32), addrspace 4) + $sgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index e1325a0a0bc50..dd5834aa4b4c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -22,46 +22,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX8-LABEL: name: load_flat_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_flat_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_flat_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_flat_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_flat_s32_from_4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i32)) + $vgpr0 = COPY %1(i32) ... @@ -80,46 +80,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX8-LABEL: name: load_flat_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX9-LABEL: name: load_flat_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX10-LABEL: name: load_flat_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX11-LABEL: name: load_flat_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX12-LABEL: name: load_flat_s32_from_2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX12-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i16)) + $vgpr0 = COPY %1(i32) ... @@ -138,46 +138,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i8)) + $vgpr0 = COPY %1(i32) ... @@ -196,46 +196,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>)) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_flat_v2s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>)) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -254,46 +254,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX8-LABEL: name: load_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX9-LABEL: name: load_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX10-LABEL: name: load_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX11-LABEL: name: load_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX12-LABEL: name: load_flat_v3s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:vgpr(<3 x i32>) = G_LOAD %0(p1) :: (load (<3 x i32>), align 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... @@ -312,46 +312,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_flat_v4s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -370,46 +370,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64)) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_flat_s64 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p1) :: (load (i64)) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -428,46 +428,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_flat_v2s64 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... @@ -524,8 +524,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... @@ -544,46 +544,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX8-LABEL: name: load_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX9-LABEL: name: load_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX10-LABEL: name: load_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX11-LABEL: name: load_flat_s96 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX12-LABEL: name: load_flat_s96 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:vgpr(i96) = G_LOAD %0(p1) :: (load (i96), align 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... @@ -602,46 +602,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX8-LABEL: name: load_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX9-LABEL: name: load_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX10-LABEL: name: load_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX11-LABEL: name: load_flat_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX12-LABEL: name: load_flat_s128 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(i128) = G_LOAD %0(p1) :: (load (i128), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... @@ -698,8 +698,8 @@ body: | ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p1) :: (load (p3)) + $vgpr0 = COPY %1(p3) ... @@ -756,8 +756,8 @@ body: | ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p1) :: (load (p1)) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -814,8 +814,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p999) = G_LOAD %0(p1) :: (load (p999)) + $vgpr0_vgpr1 = COPY %1(p999) ... @@ -872,8 +872,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x p3>) = G_LOAD %0(p1) :: (load (<2 x p3>)) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... @@ -892,46 +892,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX8-LABEL: name: load_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_flat_v2s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p1) :: (load (<2 x i16>)) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -950,46 +950,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>)) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_flat_v4s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>)) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>)) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -1008,46 +1008,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX8-LABEL: name: load_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX9-LABEL: name: load_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX10-LABEL: name: load_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX11-LABEL: name: load_flat_v6s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX12-LABEL: name: load_flat_v6s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:vgpr(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... @@ -1066,46 +1066,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_flat_v8s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<8 x i16>) = G_LOAD %0(p1) :: (load (<8 x i16>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... @@ -1136,7 +1136,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 @@ -1151,14 +1151,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 @@ -1173,27 +1173,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1220,7 +1220,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 @@ -1235,14 +1235,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 @@ -1257,27 +1257,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1304,7 +1304,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -1319,7 +1319,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -1334,7 +1334,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -1349,7 +1349,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -1364,20 +1364,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1404,7 +1404,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -1419,7 +1419,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -1434,7 +1434,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -1449,7 +1449,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -1464,20 +1464,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1504,7 +1504,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 @@ -1519,14 +1519,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 @@ -1541,27 +1541,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1588,7 +1588,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1603,7 +1603,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1618,7 +1618,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1633,7 +1633,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1648,20 +1648,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1688,7 +1688,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1703,7 +1703,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1718,7 +1718,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1733,7 +1733,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1748,20 +1748,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1788,7 +1788,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1803,7 +1803,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1818,7 +1818,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1833,7 +1833,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1848,20 +1848,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1888,7 +1888,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1903,7 +1903,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1918,7 +1918,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1933,7 +1933,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1948,20 +1948,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -1988,7 +1988,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -2003,7 +2003,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -2018,7 +2018,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -2033,7 +2033,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -2048,20 +2048,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -2088,7 +2088,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -2103,7 +2103,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -2118,7 +2118,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -2133,7 +2133,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -2148,20 +2148,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -2188,7 +2188,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -2203,7 +2203,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -2218,7 +2218,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -2233,7 +2233,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -2248,20 +2248,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -2288,7 +2288,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_24bit_max @@ -2303,7 +2303,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_24bit_max @@ -2318,7 +2318,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_24bit_max @@ -2333,7 +2333,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_24bit_max @@ -2348,20 +2348,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_24bit_max ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8388607 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8388607 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -2388,7 +2388,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_max @@ -2403,7 +2403,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_max @@ -2418,7 +2418,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_max @@ -2433,7 +2433,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_max @@ -2448,7 +2448,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_max @@ -2463,13 +2463,13 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 16777214 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 16777214 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -2496,7 +2496,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_24bit_min @@ -2511,7 +2511,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_24bit_min @@ -2526,7 +2526,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_24bit_min @@ -2541,7 +2541,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_24bit_min @@ -2556,20 +2556,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_24bit_min ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8388608 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8388608 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... @@ -2596,7 +2596,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_min @@ -2611,7 +2611,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_min @@ -2626,7 +2626,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_min @@ -2641,7 +2641,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_min @@ -2656,7 +2656,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_flat_s32_from_1_gep_2x_24bit_min @@ -2671,12 +2671,12 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX12-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8)) ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -16777215 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -16777215 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8)) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir index 864fa2ab438df..5beb0c1143fc5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir @@ -30,7 +30,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_4 @@ -42,53 +42,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX8-LABEL: name: load_global_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_global_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -112,7 +112,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_2 @@ -124,53 +124,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX8-LABEL: name: load_global_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX9-LABEL: name: load_global_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; ; GFX10-LABEL: name: load_global_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; ; GFX11-LABEL: name: load_global_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; ; GFX12-LABEL: name: load_global_s32_from_2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i16), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -194,7 +194,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1 @@ -206,53 +206,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -276,7 +276,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2s32 @@ -288,53 +288,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_v2s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -358,7 +358,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v4s32 @@ -370,53 +370,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v4s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -440,7 +440,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s64 @@ -452,53 +452,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_s64 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p1) :: (load (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -522,7 +522,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2s64 @@ -534,53 +534,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v2s64 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... @@ -604,7 +604,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2p1 @@ -616,54 +616,54 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v2p1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v2p1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) - %2:vgpr(<2 x p1>) = G_BITCAST %1(<2 x s64>) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:vgpr(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 4, addrspace 1) + %2:vgpr(<2 x p1>) = G_BITCAST %1(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x p1>) ... @@ -687,7 +687,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s128 @@ -699,54 +699,54 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_s128 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) - %2:vgpr(s128) = G_BITCAST %1(<4 x s32>) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:vgpr(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 4, addrspace 1) + %2:vgpr(i128) = G_BITCAST %1(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... @@ -827,8 +827,8 @@ body: | ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p1) :: (load (p3), addrspace 1) + $vgpr0 = COPY %1(p3) ... @@ -909,8 +909,8 @@ body: | ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p1) :: (load (p1), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -981,8 +981,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p999) = G_LOAD %0(p1) :: (load (p999), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p999) ... @@ -1006,7 +1006,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2p3 @@ -1018,54 +1018,54 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_v2p3 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_v2p3 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) - %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x s32>) - $vgpr0_vgpr1 = COPY %2 + %1:vgpr(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), addrspace 1) + %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x p3>) ... @@ -1089,7 +1089,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2s16 @@ -1101,53 +1101,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX8-LABEL: name: load_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_v2s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -1171,7 +1171,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v4s16 @@ -1183,53 +1183,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_v4s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -1253,7 +1253,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v8s16 @@ -1265,54 +1265,54 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v8s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) - %2:vgpr(<8 x s16>) = G_BITCAST %1(<4 x s32>) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 4, addrspace 1) + %2:vgpr(<8 x i16>) = G_BITCAST %1(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -1340,7 +1340,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1352,7 +1352,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1367,7 +1367,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1382,41 +1382,41 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1440,7 +1440,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1452,7 +1452,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1467,7 +1467,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1482,14 +1482,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1504,27 +1504,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1556,7 +1556,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1576,7 +1576,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1591,7 +1591,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1606,41 +1606,41 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1672,7 +1672,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1692,7 +1692,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1707,7 +1707,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1722,41 +1722,41 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1780,7 +1780,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1792,7 +1792,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1807,7 +1807,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1822,14 +1822,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1844,27 +1844,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1889,7 +1889,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1902,7 +1902,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1917,7 +1917,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1932,7 +1932,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1947,7 +1947,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1962,7 +1962,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1977,20 +1977,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2022,7 +2022,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2042,7 +2042,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2057,7 +2057,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2072,14 +2072,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2094,27 +2094,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2146,7 +2146,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2166,7 +2166,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2181,7 +2181,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2196,14 +2196,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2218,27 +2218,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2263,7 +2263,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2276,7 +2276,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2291,7 +2291,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2306,7 +2306,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2321,7 +2321,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2336,7 +2336,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2351,20 +2351,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2389,7 +2389,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2402,7 +2402,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2417,7 +2417,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2432,7 +2432,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2447,7 +2447,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2462,7 +2462,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2477,20 +2477,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2522,7 +2522,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2542,7 +2542,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2557,7 +2557,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2572,7 +2572,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2587,7 +2587,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2602,7 +2602,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2617,20 +2617,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2662,7 +2662,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2682,7 +2682,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2697,7 +2697,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2712,7 +2712,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2727,7 +2727,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2742,7 +2742,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2757,20 +2757,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2795,7 +2795,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2808,7 +2808,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2823,7 +2823,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2838,7 +2838,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2853,7 +2853,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2868,7 +2868,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2883,20 +2883,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_max ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8388607 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8388607 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2921,7 +2921,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2934,7 +2934,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2949,7 +2949,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2964,7 +2964,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2979,7 +2979,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2994,7 +2994,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -3009,7 +3009,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -3024,13 +3024,13 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 16777214 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 16777214 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -3062,7 +3062,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3082,7 +3082,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3097,7 +3097,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3112,7 +3112,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3127,7 +3127,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3142,7 +3142,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3157,20 +3157,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_min ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8388608 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8388608 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -3202,7 +3202,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3222,7 +3222,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3237,7 +3237,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3252,7 +3252,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3267,7 +3267,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3282,7 +3282,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3297,7 +3297,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3312,12 +3312,12 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -16777215 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -16777215 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir index 65f6b8879e16f..a269b430c2abd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -19,7 +19,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr @@ -27,7 +27,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr @@ -35,7 +35,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr @@ -43,12 +43,12 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(p1) = COPY %0 - %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %2 + %1:vgpr(p1) = COPY %0(p1) + %2:vgpr(i32) = G_LOAD %1(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %2(i32) ... @@ -69,7 +69,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr @@ -77,7 +77,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr @@ -85,7 +85,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr @@ -93,15 +93,15 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY %0 - %3:vgpr(s64) = G_ZEXT %1 - %4:vgpr(p1) = G_PTR_ADD %2, %3 - %5:vgpr(s32) = G_LOAD %4 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %5 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0(p1) + %3:vgpr(i64) = G_ZEXT %1(i32) + %4:vgpr(p1) = G_PTR_ADD %2, %3(i64) + %5:vgpr(i32) = G_LOAD %4(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %5(i32) ... @@ -122,7 +122,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr @@ -130,7 +130,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr @@ -138,7 +138,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr @@ -146,16 +146,16 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY %0 - %zero:vgpr(s32) = G_CONSTANT i32 0 - %3:vgpr(s64) = G_MERGE_VALUES %1, %zero - %4:vgpr(p1) = G_PTR_ADD %2, %3 - %5:vgpr(s32) = G_LOAD %4 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %5 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0(p1) + %zero:vgpr(i32) = G_CONSTANT i32 0 + %4:vgpr(i64) = G_MERGE_VALUES %1(i32), %zero(i32) + %5:vgpr(p1) = G_PTR_ADD %2, %4(i64) + %6:vgpr(i32) = G_LOAD %5(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %6(i32) ... @@ -185,7 +185,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr @@ -203,7 +203,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr @@ -221,7 +221,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr @@ -239,16 +239,16 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY %0 - %notzero:vgpr(s32) = G_CONSTANT i32 1 - %3:vgpr(s64) = G_MERGE_VALUES %1, %notzero - %4:vgpr(p1) = G_PTR_ADD %2, %3 - %5:vgpr(s32) = G_LOAD %4 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %5 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0(p1) + %notzero:vgpr(i32) = G_CONSTANT i32 1 + %4:vgpr(i64) = G_MERGE_VALUES %1(i32), %notzero(i32) + %5:vgpr(p1) = G_PTR_ADD %2, %4(i64) + %6:vgpr(i32) = G_LOAD %5(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %6(i32) ... @@ -268,7 +268,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 @@ -294,7 +294,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 @@ -302,7 +302,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 @@ -310,18 +310,18 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY %0 - %zero:vgpr(s32) = G_CONSTANT i32 0 - %zext:vgpr(s64) = G_MERGE_VALUES %1, %zero - %4:vgpr(p1) = G_PTR_ADD %2, %zext - %5:vgpr(s64) = G_CONSTANT i64 4095 - %6:vgpr(p1) = G_PTR_ADD %4, %5 - %7:vgpr(s32) = G_LOAD %6 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %7 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0(p1) + %zero:vgpr(i32) = G_CONSTANT i32 0 + %zext:vgpr(i64) = G_MERGE_VALUES %1(i32), %zero(i32) + %5:vgpr(p1) = G_PTR_ADD %2, %zext(i64) + %6:vgpr(i64) = G_CONSTANT i64 4095 + %7:vgpr(p1) = G_PTR_ADD %5, %6(i64) + %8:vgpr(i32) = G_LOAD %7(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %8(i32) ... @@ -341,7 +341,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 @@ -367,7 +367,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 @@ -375,7 +375,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 @@ -383,18 +383,18 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY %0 - %zero:vgpr(s32) = G_CONSTANT i32 0 - %zext:vgpr(s64) = G_MERGE_VALUES %1, %zero - %4:vgpr(p1) = G_PTR_ADD %2, %zext - %5:vgpr(s64) = G_CONSTANT i64 -4096 - %6:vgpr(p1) = G_PTR_ADD %4, %5 - %7:vgpr(s32) = G_LOAD %6 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %7 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0(p1) + %zero:vgpr(i32) = G_CONSTANT i32 0 + %zext:vgpr(i64) = G_MERGE_VALUES %1(i32), %zero(i32) + %5:vgpr(p1) = G_PTR_ADD %2, %zext(i64) + %6:vgpr(i64) = G_CONSTANT i64 -4096 + %7:vgpr(p1) = G_PTR_ADD %5, %6(i64) + %8:vgpr(i32) = G_LOAD %7(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %8(i32) ... --- @@ -412,7 +412,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 @@ -420,7 +420,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 @@ -428,7 +428,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 @@ -436,14 +436,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4096, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 4096 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 4096 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -462,7 +462,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 @@ -470,7 +470,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 @@ -478,7 +478,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 @@ -486,14 +486,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4097, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4097, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 4097 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 4097 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -520,7 +520,7 @@ body: | ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 @@ -536,7 +536,7 @@ body: | ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 @@ -552,7 +552,7 @@ body: | ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 @@ -560,14 +560,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -4097, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -4097, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -4097 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 -4097 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -586,7 +586,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 @@ -594,7 +594,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 @@ -602,7 +602,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 @@ -610,14 +610,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 2049 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 2049 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -636,7 +636,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 @@ -652,7 +652,7 @@ body: | ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 @@ -660,7 +660,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 @@ -668,14 +668,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -2049 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 -2049 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... --- @@ -693,7 +693,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 @@ -701,7 +701,7 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 @@ -709,7 +709,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 @@ -717,14 +717,14 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4286578688, implicit $exec - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 8388607, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 8388607, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 4294967295 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 4294967295 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... --- @@ -750,7 +750,7 @@ body: | ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 @@ -766,7 +766,7 @@ body: | ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 @@ -782,7 +782,7 @@ body: | ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 @@ -798,14 +798,14 @@ body: | ; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 4294967296 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 4294967296 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -832,7 +832,7 @@ body: | ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 @@ -848,7 +848,7 @@ body: | ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 @@ -864,7 +864,7 @@ body: | ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 @@ -880,14 +880,14 @@ body: | ; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 4294971390 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 4294971390 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -914,7 +914,7 @@ body: | ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 @@ -930,7 +930,7 @@ body: | ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 @@ -946,7 +946,7 @@ body: | ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 @@ -962,14 +962,14 @@ body: | ; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -4294967295 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 -4294967295 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... --- @@ -995,7 +995,7 @@ body: | ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 @@ -1011,7 +1011,7 @@ body: | ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 @@ -1027,7 +1027,7 @@ body: | ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 @@ -1043,14 +1043,14 @@ body: | ; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -4294967296 - %2:sgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %4 + %1:sgpr(i64) = G_CONSTANT i64 -4294967296 + %2:sgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(p1) = COPY %2(p1) + %4:vgpr(i32) = G_LOAD %3(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %4(i32) ... @@ -1065,30 +1065,30 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX11: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX12: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = G_IMPLICIT_DEF - %1:vgpr(p1) = COPY %0 - %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %2 + %1:vgpr(p1) = COPY %0(p1) + %2:vgpr(i32) = G_LOAD %1(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %2(i32) ... @@ -1102,25 +1102,25 @@ body: | bb.0: ; GFX9-LABEL: name: load_global_s32_from_undef_vgpr ; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_undef_vgpr ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_undef_vgpr ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_undef_vgpr ; GFX12: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = G_IMPLICIT_DEF - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir index d67c2a694c1d0..51935d54e749d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -30,7 +30,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_4 @@ -42,53 +42,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX8-LABEL: name: load_global_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_global_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_s32_from_4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -112,7 +112,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_2 @@ -124,53 +124,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX8-LABEL: name: load_global_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; ; GFX9-LABEL: name: load_global_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; ; GFX10-LABEL: name: load_global_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; ; GFX11-LABEL: name: load_global_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; ; GFX12-LABEL: name: load_global_s32_from_2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i16), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -194,7 +194,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1 @@ -206,53 +206,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %1(i32) ... @@ -276,7 +276,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2s32 @@ -288,53 +288,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i32>), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_v2s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -358,7 +358,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v4s32 @@ -370,53 +370,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i32>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v4s32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -440,7 +440,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s64 @@ -452,53 +452,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_s64 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p1) :: (load (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -522,7 +522,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2s64 @@ -534,53 +534,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i64>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v2s64 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i64>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... @@ -651,8 +651,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... @@ -671,60 +671,60 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX7-LABEL: name: load_global_s128 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX7-FLAT-LABEL: name: load_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX8-LABEL: name: load_global_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX9-LABEL: name: load_global_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX10-LABEL: name: load_global_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX11-LABEL: name: load_global_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) ; ; GFX12-LABEL: name: load_global_s128 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p1) :: (load (i128), align 4, addrspace 1) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(i128) = G_LOAD %0(p1) :: (load (i128), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... @@ -805,8 +805,8 @@ body: | ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p1) :: (load (p3), addrspace 1) + $vgpr0 = COPY %1(p3) ... @@ -887,8 +887,8 @@ body: | ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p1) :: (load (p1), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -959,8 +959,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p999) = G_LOAD %0(p1) :: (load (p999), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p999) ... @@ -1031,8 +1031,8 @@ body: | ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x p3>) = G_LOAD %0(p1) :: (load (<2 x p3>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... @@ -1056,7 +1056,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v2s16 @@ -1068,53 +1068,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX8-LABEL: name: load_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; ; GFX9-LABEL: name: load_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX10-LABEL: name: load_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_global_v2s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -1138,7 +1138,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v4s16 @@ -1150,53 +1150,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX8-LABEL: name: load_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x i16>), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_global_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_global_v4s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -1220,7 +1220,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x i16>), align 4, addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-LABEL: name: load_global_v8s16 @@ -1232,53 +1232,53 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x i16>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX8-LABEL: name: load_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x i16>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; ; GFX9-LABEL: name: load_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x i16>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX10-LABEL: name: load_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x i16>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX11-LABEL: name: load_global_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x i16>), align 4, addrspace 1) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; ; GFX12-LABEL: name: load_global_v8s16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x i16>), align 4, addrspace 1) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<8 x i16>) = G_LOAD %0(p1) :: (load (<8 x i16>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... @@ -1306,7 +1306,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1318,7 +1318,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1333,7 +1333,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1348,41 +1348,41 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1406,7 +1406,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1418,7 +1418,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1433,7 +1433,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1448,14 +1448,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1470,27 +1470,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1522,7 +1522,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1542,7 +1542,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1557,7 +1557,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1572,41 +1572,41 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1638,7 +1638,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1658,7 +1658,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1673,7 +1673,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1688,41 +1688,41 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1746,7 +1746,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1758,7 +1758,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1773,7 +1773,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1788,14 +1788,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1810,27 +1810,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1855,7 +1855,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1868,7 +1868,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1883,7 +1883,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1898,7 +1898,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1913,7 +1913,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1928,7 +1928,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1943,20 +1943,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -1988,7 +1988,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2008,7 +2008,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2023,7 +2023,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2038,14 +2038,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -2060,27 +2060,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2112,7 +2112,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2132,7 +2132,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2147,7 +2147,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2162,14 +2162,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2184,27 +2184,27 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2229,7 +2229,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2242,7 +2242,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2257,7 +2257,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2272,7 +2272,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2287,7 +2287,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2302,7 +2302,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191 @@ -2317,20 +2317,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2355,7 +2355,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2368,7 +2368,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2383,7 +2383,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2398,7 +2398,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2413,7 +2413,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2428,7 +2428,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192 @@ -2443,20 +2443,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2488,7 +2488,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2508,7 +2508,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2523,7 +2523,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2538,7 +2538,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2553,7 +2553,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2568,7 +2568,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -2583,20 +2583,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2628,7 +2628,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2648,7 +2648,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2663,7 +2663,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2678,7 +2678,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2693,7 +2693,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2708,7 +2708,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2723,20 +2723,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2761,7 +2761,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2774,7 +2774,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2789,7 +2789,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2804,7 +2804,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2819,7 +2819,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2834,7 +2834,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_max @@ -2849,20 +2849,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_max ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 8388607 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 8388607 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -2887,7 +2887,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2900,7 +2900,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2915,7 +2915,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2930,7 +2930,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2945,7 +2945,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2960,7 +2960,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2975,7 +2975,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max @@ -2990,13 +2990,13 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 16777214 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 16777214 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -3028,7 +3028,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3048,7 +3048,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3063,7 +3063,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3078,7 +3078,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3093,7 +3093,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3108,7 +3108,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_min @@ -3123,20 +3123,20 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_min ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -8388608 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -8388608 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... @@ -3168,7 +3168,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3188,7 +3188,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3203,7 +3203,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3218,7 +3218,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3233,7 +3233,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3248,7 +3248,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3263,7 +3263,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min @@ -3278,12 +3278,12 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (i8), addrspace 1) ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -16777215 - %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) - $vgpr0 = COPY %3 + %1:vgpr(i64) = G_CONSTANT i64 -16777215 + %2:vgpr(p1) = G_PTR_ADD %0, %1(i64) + %3:vgpr(i32) = G_LOAD %2(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir index 8ac6254adfafe..11f9e21b8fbde 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir @@ -27,39 +27,39 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<3 x i32>), align 4, addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4, addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX8-LABEL: name: load_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x i32>), align 4, addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; ; GFX9-LABEL: name: load_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x i32>), align 4, addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] ; ; GFX10-LABEL: name: load_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x i32>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:vgpr(<3 x i32>) = G_LOAD %0(p1) :: (load (<3 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... @@ -78,39 +78,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX7-FLAT-LABEL: name: load_global_s96 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX8-LABEL: name: load_global_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX9-LABEL: name: load_global_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) ; ; GFX10-LABEL: name: load_global_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(i96) = G_LOAD [[COPY]](p1) :: (load (i96), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](i96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:vgpr(i96) = G_LOAD %0(p1) :: (load (i96), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... @@ -129,38 +129,38 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX7-FLAT-LABEL: name: load_global_v6s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX8-LABEL: name: load_global_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX9-LABEL: name: load_global_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) ; ; GFX10-LABEL: name: load_global_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x i16>) = G_LOAD [[COPY]](p1) :: (load (<6 x i16>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x i16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:vgpr(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir index 94104885748a9..7608e7d0e6509 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir @@ -20,29 +20,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x i32>), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]] + ; ; GFX9-LABEL: name: load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; ; GFX10-LABEL: name: load_local_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; ; GFX11-LABEL: name: load_local_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -62,29 +65,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; ; GFX9-LABEL: name: load_local_v4s32_align_8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX10-LABEL: name: load_local_v4s32_align_8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX11-LABEL: name: load_local_v4s32_align_8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... @@ -104,31 +110,34 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 400 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load (<4 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 400 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(<4 x i32>) = G_LOAD %2(p3) :: (load (<4 x i32>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... @@ -150,37 +159,40 @@ body: | ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x i32>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 4000 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load (<4 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4000 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(<4 x i32>) = G_LOAD %2(p3) :: (load (<4 x i32>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... @@ -200,29 +212,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x i64>), align 8, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; ; GFX9-LABEL: name: load_local_v2s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x i64>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX10-LABEL: name: load_local_v2s64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x i64>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX11-LABEL: name: load_local_v2s64 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x i64>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x i64>) = G_LOAD %0(p3) :: (load (<2 x i64>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... @@ -244,18 +259,21 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX9-LABEL: name: load_local_v2p1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX10-LABEL: name: load_local_v2p1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX11-LABEL: name: load_local_v2p1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -263,8 +281,8 @@ body: | ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<2 x p1>) = G_LOAD %0(p3) :: (load (<2 x p1>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... @@ -284,29 +302,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p3) :: (load (i128), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) + ; ; GFX9-LABEL: name: load_local_s128 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p3) :: (load (i128), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) + ; ; GFX10-LABEL: name: load_local_s128 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p3) :: (load (i128), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) + ; ; GFX11-LABEL: name: load_local_s128 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(i128) = G_LOAD [[COPY]](p3) :: (load (i128), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](i128) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(i128) = G_LOAD %0(p3) :: (load (i128), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... @@ -326,28 +347,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x i16>), align 8, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; ; GFX9-LABEL: name: load_local_v8s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x i16>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX10-LABEL: name: load_local_v8s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x i16>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; ; GFX11-LABEL: name: load_local_v8s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x i16>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:vgpr(<8 x i16>) = G_LOAD %0(p3) :: (load (<8 x i16>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir index 59c57a5fefbed..0c9d54a7056ca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -22,7 +22,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i32), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] ; ; GFX7-LABEL: name: load_local_s32_from_4 @@ -30,25 +30,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i32), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] ; ; GFX9-LABEL: name: load_local_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p3) :: (load (i32), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -68,7 +68,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i16), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] ; ; GFX7-LABEL: name: load_local_s32_from_2 @@ -76,25 +76,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i16), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] ; ; GFX9-LABEL: name: load_local_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -117,7 +117,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: load_local_s32_from_1 @@ -125,25 +125,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: load_local_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -163,7 +163,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX7-LABEL: name: load_local_v2s32 @@ -171,25 +171,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX9-LABEL: name: load_local_v2s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] ; ; GFX10-LABEL: name: load_local_v2s32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -209,33 +209,33 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX7-LABEL: name: load_local_v2s32_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x i32>), align 4, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; ; GFX9-LABEL: name: load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x i32>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] ; ; GFX10-LABEL: name: load_local_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x i32>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -255,7 +255,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i64), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX7-LABEL: name: load_local_s64 @@ -263,25 +263,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i64), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX9-LABEL: name: load_local_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] ; ; GFX10-LABEL: name: load_local_s64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i64), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p3) :: (load (i64), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -301,33 +301,33 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX7-LABEL: name: load_local_s64_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; ; GFX9-LABEL: name: load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] ; ; GFX10-LABEL: name: load_local_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(i64) = G_LOAD %0(p3) :: (load (i64), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... @@ -372,8 +372,8 @@ body: | ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p3) :: (load (p3), addrspace 3) + $vgpr0 = COPY %1(p3) ... @@ -418,8 +418,8 @@ body: | ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(p5) = G_LOAD %0(p3) :: (load (p5), addrspace 3) + $vgpr0 = COPY %1(p5) ... @@ -464,8 +464,8 @@ body: | ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p3) :: (load (p1), addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -510,8 +510,8 @@ body: | ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p1) = G_LOAD %0(p3) :: (load (p1), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... @@ -556,8 +556,8 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(p999) = G_LOAD %0(p3) :: (load (p999), addrspace 3) + $vgpr0_vgpr1 = COPY %1(p999) ... @@ -577,7 +577,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX7-LABEL: name: load_local_v2p3 @@ -585,26 +585,26 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX9-LABEL: name: load_local_v2p3 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] ; ; GFX10-LABEL: name: load_local_v2p3 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) - %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x s32>) - $vgpr0_vgpr1 = COPY %2 + %1:vgpr(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), addrspace 3) + %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x p3>) ... @@ -624,7 +624,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x i16>), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] ; ; GFX7-LABEL: name: load_local_v2s16 @@ -632,25 +632,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x i16>), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] ; ; GFX9-LABEL: name: load_local_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] ; ; GFX10-LABEL: name: load_local_v2s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p3) :: (load (<2 x i16>), addrspace 3) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -670,7 +670,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x i16>), addrspace 3) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX7-LABEL: name: load_local_v4s16 @@ -678,25 +678,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x i16>), addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; ; GFX9-LABEL: name: load_local_v4s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] ; ; GFX10-LABEL: name: load_local_v4s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x i16>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:vgpr(<4 x i16>) = G_LOAD %0(p3) :: (load (<4 x i16>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... @@ -742,7 +742,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535 @@ -750,27 +750,27 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 65535 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 65535 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %3(i32) ... @@ -792,7 +792,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address @@ -802,7 +802,7 @@ body: | ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX7-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address @@ -811,7 +811,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address @@ -820,16 +820,16 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 2147483647 - %2:vgpr(s32) = G_AND %0, %1 - %3:vgpr(p3) = G_INTTOPTR %2 - %4:vgpr(s32) = G_CONSTANT i32 65535 - %5:vgpr(p3) = G_PTR_ADD %3, %4 - %6:vgpr(s32) = G_LOAD %5 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CONSTANT i32 2147483647 + %2:vgpr(i32) = G_AND %0, %1 + %3:vgpr(p3) = G_INTTOPTR %2(i32) + %4:vgpr(i32) = G_CONSTANT i32 65535 + %5:vgpr(p3) = G_PTR_ADD %3, %4(i32) + %6:vgpr(i32) = G_LOAD %5(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %6(i32) ... @@ -851,7 +851,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536 @@ -861,7 +861,7 @@ body: | ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 @@ -870,7 +870,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_1_gep_65536 @@ -879,13 +879,13 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 65536 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 65536 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %3(i32) ... @@ -907,7 +907,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1 @@ -917,7 +917,7 @@ body: | ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 @@ -926,7 +926,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] ; ; GFX10-LABEL: name: load_local_s32_from_1_gep_m1 @@ -935,13 +935,13 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -1 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -1 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %3(i32) ... @@ -960,38 +960,38 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1016 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] ; ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 1016 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s64) = G_LOAD %2 :: (load (s64), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 1016 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i64) = G_LOAD %2(p3) :: (load (i64), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -1010,11 +1010,11 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1020 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1023,7 +1023,7 @@ body: | ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020 @@ -1032,7 +1032,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] ; ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1020 @@ -1041,12 +1041,12 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (i64), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 1020 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s64) = G_LOAD %2 :: (load (s64), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 1020 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i64) = G_LOAD %2(p3) :: (load (i64), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index df148638e7005..1b736c40975b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -24,32 +24,32 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_private_s32_from_4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -71,32 +71,32 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i16), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_USHORT]] ; ; GFX12-LABEL: name: load_private_s32_from_2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i16), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_USHORT]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i16), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -118,32 +118,32 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -189,8 +189,8 @@ body: | ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(p3) = G_LOAD %0(p5) :: (load (p3), addrspace 5) + $vgpr0 = COPY %1(p3) ... @@ -236,8 +236,8 @@ body: | ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(p5) = G_LOAD %0(p5) :: (load (p5), addrspace 5) + $vgpr0 = COPY %1(p5) ... @@ -260,32 +260,32 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x i16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX11-LABEL: name: load_private_v2s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_private_v2s16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x i16>), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(<2 x i16>) = G_LOAD %0(p5) :: (load (<2 x i16>), addrspace 5) + $vgpr0 = COPY %1(<2 x i16>) ... @@ -313,14 +313,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 @@ -329,20 +329,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 2047 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 2047 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -366,7 +366,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits @@ -375,7 +375,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_2047_known_bits @@ -384,16 +384,16 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX12-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 2147483647 - %2:vgpr(s32) = G_AND %0, %1 - %3:vgpr(p5) = G_INTTOPTR %2 - %4:vgpr(s32) = G_CONSTANT i32 2047 - %5:vgpr(p5) = G_PTR_ADD %3, %4 - %6:vgpr(s32) = G_LOAD %5 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CONSTANT i32 2147483647 + %2:vgpr(i32) = G_AND %0, %1 + %3:vgpr(p5) = G_INTTOPTR %2(i32) + %4:vgpr(i32) = G_CONSTANT i32 2047 + %5:vgpr(p5) = G_PTR_ADD %3, %4(i32) + %6:vgpr(i32) = G_LOAD %5(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %6(i32) ... @@ -417,14 +417,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 @@ -433,20 +433,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 2048 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 2048 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -470,7 +470,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 @@ -479,27 +479,27 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -2047 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -2047 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -523,7 +523,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 @@ -532,27 +532,27 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -2048 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -2048 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -576,14 +576,14 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 @@ -592,20 +592,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -629,7 +629,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 @@ -638,7 +638,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 @@ -647,20 +647,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 4096 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4096 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -684,7 +684,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 @@ -693,27 +693,27 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -4095 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -4095 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -737,7 +737,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 @@ -746,27 +746,27 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -4096 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -4096 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -790,7 +790,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 @@ -799,7 +799,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 @@ -808,20 +808,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 8191 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 8191 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -845,7 +845,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 @@ -854,7 +854,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 @@ -863,20 +863,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 8192 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 8192 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -900,7 +900,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 @@ -909,7 +909,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 @@ -918,20 +918,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -8191 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -8191 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -955,7 +955,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 @@ -964,7 +964,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 @@ -973,20 +973,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -8192 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -8192 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1010,7 +1010,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8388607, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_24bit_max @@ -1019,7 +1019,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8388607, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_24bit_max @@ -1028,20 +1028,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8388607, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_24bit_max ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 8388607 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 8388607 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1065,7 +1065,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16777214, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2x_24bit_max @@ -1074,7 +1074,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16777214, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2x_24bit_max @@ -1083,7 +1083,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16777214, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_2x_24bit_max @@ -1092,13 +1092,13 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16777214, implicit $exec ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 16777214 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 16777214 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1122,7 +1122,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8388608, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_24bit_min @@ -1131,7 +1131,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8388608, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_24bit_min @@ -1140,20 +1140,20 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8388608, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_24bit_min ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -8388608 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -8388608 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1177,7 +1177,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16777215, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2x_24bit_min @@ -1186,7 +1186,7 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16777215, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2x_24bit_min @@ -1195,7 +1195,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16777215, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_gep_2x_24bit_min @@ -1204,13 +1204,13 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16777215, implicit $exec ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 -16777215 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 -16777215 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1228,25 +1228,25 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_private_s32_from_4_constant_0 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -1264,25 +1264,25 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX12: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 - ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:sgpr(p5) = G_CONSTANT i32 16 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -1300,25 +1300,25 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4095 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -1337,26 +1337,26 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] ; ; GFX12-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4096 - %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -1376,23 +1376,23 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_fi - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_fi - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_fi - ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] ; ; GFX12-LABEL: name: load_private_s32_from_fi - ; GFX12: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX12: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0 = COPY %1(i32) ... @@ -1411,25 +1411,25 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] ; ; GFX12-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX12: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:vgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1449,26 +1449,26 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 - ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] ; ; GFX12-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 - ; GFX12: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:sgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(s32) = COPY %1 - %3:vgpr(p5) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_LOAD %3 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %4 + %1:sgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(p5) = G_PTR_ADD %0, %2(i32) + %4:vgpr(i32) = G_LOAD %3(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %4(i32) ... @@ -1491,29 +1491,29 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (i8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SVS]] ; ; GFX12-LABEL: name: load_private_s32_from_1_fi_offset_4096 - ; GFX12: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX12: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4096, 0, implicit $exec, implicit $flat_scr :: (load (i8), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:vgpr(s32) = G_CONSTANT i32 4096 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4096 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_LOAD %2(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %3(i32) ... @@ -1533,25 +1533,25 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_neg1 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (i32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; ; GFX11-LABEL: name: load_private_s32_from_neg1 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] ; ; GFX12-LABEL: name: load_private_s32_from_neg1 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX12-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (i32), addrspace 5) ; GFX12-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 -1 - %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 6f971788727b2..f00e784c6b65d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -98,49 +98,49 @@ body: | %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 4 + %1:sgpr(i64) = G_CONSTANT i64 4 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4) + %3:sgpr(i32) = G_LOAD %2 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %3 - %4:sgpr(s64) = G_CONSTANT i64 1020 + %4:sgpr(i64) = G_CONSTANT i64 1020 %5:sgpr(p4) = G_PTR_ADD %0, %4 - %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4) + %6:sgpr(i32) = G_LOAD %5 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %6 - %7:sgpr(s64) = G_CONSTANT i64 1024 + %7:sgpr(i64) = G_CONSTANT i64 1024 %8:sgpr(p4) = G_PTR_ADD %0, %7 - %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4) + %9:sgpr(i32) = G_LOAD %8 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %9 - %10:sgpr(s64) = G_CONSTANT i64 1048572 + %10:sgpr(i64) = G_CONSTANT i64 1048572 %11:sgpr(p4) = G_PTR_ADD %0, %10 - %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4) + %12:sgpr(i32) = G_LOAD %11 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %12 - %13:sgpr(s64) = G_CONSTANT i64 1048576 + %13:sgpr(i64) = G_CONSTANT i64 1048576 %14:sgpr(p4) = G_PTR_ADD %0, %13 - %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4) + %15:sgpr(i32) = G_LOAD %14 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %15 - %16:sgpr(s64) = G_CONSTANT i64 17179869180 + %16:sgpr(i64) = G_CONSTANT i64 17179869180 %17:sgpr(p4) = G_PTR_ADD %0, %16 - %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4) + %18:sgpr(i32) = G_LOAD %17 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %18 - %19:sgpr(s64) = G_CONSTANT i64 17179869184 + %19:sgpr(i64) = G_CONSTANT i64 17179869184 %20:sgpr(p4) = G_PTR_ADD %0, %19 - %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4) + %21:sgpr(i32) = G_LOAD %20 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %21 - %22:sgpr(s64) = G_CONSTANT i64 4294967292 + %22:sgpr(i64) = G_CONSTANT i64 4294967292 %23:sgpr(p4) = G_PTR_ADD %0, %22 - %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4) + %24:sgpr(i32) = G_LOAD %23 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %24 - %25:sgpr(s64) = G_CONSTANT i64 4294967296 + %25:sgpr(i64) = G_CONSTANT i64 4294967296 %26:sgpr(p4) = G_PTR_ADD %0, %25 - %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4) + %27:sgpr(i32) = G_LOAD %26 :: (load (i32) from %ir.const0, addrspace 4) $sgpr0 = COPY %27 %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4) @@ -168,19 +168,19 @@ body: | ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] - %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) + %2:sgpr(<8 x i32>) = G_LOAD %0 :: (load (<8 x i32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]] - %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) + %3:sgpr(<16 x i32>) = G_LOAD %0 :: (load (<16 x i32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3 ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]] - %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1) + %4:sgpr(<8 x i32>) = G_LOAD %1 :: (load (<8 x i32>), addrspace 1) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 ; CHECK s_load_dwordx16 [[GLOBAL_PTR]] - %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1) + %5:sgpr(<16 x i32>) = G_LOAD %1 :: (load (<16 x i32>), addrspace 1) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5 ... @@ -189,8 +189,8 @@ body: | # GCN-LABEL: name: constant_address_positive{{$}} # GCN: %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 44 -# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4) -# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4) +# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (i32), addrspace 4) +# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (i32), addrspace 4) --- @@ -202,9 +202,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 %0:sgpr(p4) = G_CONSTANT i64 44 - %1:sgpr(s64) = G_CONSTANT i64 64 + %1:sgpr(i64) = G_CONSTANT i64 64 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4) + %3:sgpr(i32) = G_LOAD %2 :: (dereferenceable invariant load (i32), align 4, addrspace 4) S_ENDPGM 0, implicit %3 ... @@ -224,10 +224,10 @@ body: | bb.0: liveins: $sgpr0_sgpr1, $sgpr2 %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_ZEXT %1:sgpr(i32) %4:sgpr(p4) = G_PTR_ADD %0, %2 - %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4) + %5:sgpr(i32) = G_LOAD %4 :: (dereferenceable invariant load (i32), align 4, addrspace 4) S_ENDPGM 0, implicit %5 ... @@ -247,11 +247,11 @@ body: | bb.0: liveins: $sgpr0_sgpr1, $sgpr2 %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_ZEXT %1:sgpr(i32) %4:sgpr(p4) = G_PTR_ADD %0, %2 - %5:sgpr(s64) = G_CONSTANT i64 16 + %5:sgpr(i64) = G_CONSTANT i64 16 %6:sgpr(p4) = G_PTR_ADD %4, %5 - %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4) + %7:sgpr(i32) = G_LOAD %6 :: (dereferenceable invariant load (i32), align 4, addrspace 4) S_ENDPGM 0, implicit %7 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir index d18677d4adf69..59b8334f2e9c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -53,10 +53,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -106,10 +106,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -159,10 +159,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -212,10 +212,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -265,10 +265,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX10-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -318,10 +318,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s64) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i64) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -371,10 +371,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s64) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i64) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -424,9 +424,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index a96b574a64784..8b4884f623a12 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -11,11 +11,11 @@ # RUN: FileCheck --check-prefix=ERR %s < %t # ERR-NOT: remark -# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_LSHR %2:sgpr, %3:sgpr(s16) (in function: lshr_s16_s16_ss) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_s32_vv) -# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_s32_ss) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_s32_sv) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_s32_vs) +# ERR: remark: :0:0: cannot select: %4:sgpr(i16) = G_LSHR %2:sgpr, %3:sgpr(i16) (in function: lshr_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_LSHR %2:vgpr, %1:vgpr(i32) (in function: lshr_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(i16) = G_LSHR %2:sgpr, %1:sgpr(i32) (in function: lshr_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_LSHR %2:sgpr, %1:vgpr(i32) (in function: lshr_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_LSHR %2:vgpr, %1:sgpr(i32) (in function: lshr_s16_s32_vs) # ERR-NOT: remark --- @@ -30,45 +30,48 @@ body: | ; GFX8-LABEL: name: lshr_s16_s16_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX9-LABEL: name: lshr_s16_s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX10-LABEL: name: lshr_s16_s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX11-LABEL: name: lshr_s16_s16_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -86,6 +89,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX9-LABEL: name: lshr_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -93,6 +97,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX10-LABEL: name: lshr_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -100,6 +105,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX11-LABEL: name: lshr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -107,12 +113,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -127,40 +133,43 @@ body: | ; GFX8-LABEL: name: lshr_s16_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX9-LABEL: name: lshr_s16_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX10-LABEL: name: lshr_s16_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX11-LABEL: name: lshr_s16_s32_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_LSHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_LSHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -179,6 +188,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX9-LABEL: name: lshr_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -186,6 +196,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX10-LABEL: name: lshr_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -193,6 +204,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX11-LABEL: name: lshr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -200,12 +212,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -224,6 +236,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX9-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -231,6 +244,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX10-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -240,6 +254,7 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; GFX11-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -249,13 +264,13 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_fake16_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_LSHR %2, %3 - %5:vgpr(s32) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_LSHR %2, %3(i16) + %5:vgpr(i32) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i32) ... --- @@ -276,6 +291,7 @@ body: | ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -285,6 +301,7 @@ body: | ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -297,6 +314,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX11-LABEL: name: lshr_s16_vv_zext_to_s64 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -309,13 +327,13 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_LSHR %2, %3 - %5:vgpr(s64) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_LSHR %2, %3(i16) + %5:vgpr(i64) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i64) ... --- @@ -330,40 +348,43 @@ body: | ; GFX8-LABEL: name: lshr_s16_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX9-LABEL: name: lshr_s16_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX10-LABEL: name: lshr_s16_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX11-LABEL: name: lshr_s16_s32_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_LSHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:sgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_LSHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -377,40 +398,43 @@ body: | ; GFX8-LABEL: name: lshr_s16_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX9-LABEL: name: lshr_s16_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX10-LABEL: name: lshr_s16_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX11-LABEL: name: lshr_s16_s32_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_LSHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_LSHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -428,6 +452,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX9-LABEL: name: lshr_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -435,6 +460,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX10-LABEL: name: lshr_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -442,6 +468,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; ; GFX11-LABEL: name: lshr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -449,12 +476,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -468,38 +495,41 @@ body: | ; GFX8-LABEL: name: lshr_s16_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX9-LABEL: name: lshr_s16_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX10-LABEL: name: lshr_s16_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + ; ; GFX11-LABEL: name: lshr_s16_s32_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_LSHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_LSHR %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir index 4fb8e6ab71a6a..066f90a7ec5b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir @@ -7,7 +7,7 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_ss) +# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x i16>) (in function: lshr_v2s16_ss) # ERR-NOT: remark --- @@ -21,21 +21,22 @@ body: | ; GFX9-LABEL: name: lshr_v2s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:sgpr(<2 x i16>) = G_LSHR [[COPY]], [[COPY1]](<2 x i16>) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x i16>) + ; ; GFX10-LABEL: name: lshr_v2s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(<2 x i16>) = G_LSHR [[COPY]], [[COPY1]](<2 x i16>) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x i16>) + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -53,6 +54,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] + ; ; GFX10-LABEL: name: lshr_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -60,10 +62,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr0 + %2:vgpr(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -81,6 +83,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] + ; ; GFX10-LABEL: name: lshr_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -88,10 +91,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr0 + %2:vgpr(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -109,6 +112,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] + ; ; GFX10-LABEL: name: lshr_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -116,8 +120,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir index 0e2ea18c74e4a..831c9b7edf3c8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir @@ -47,13 +47,13 @@ body: | ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 ; GFX12-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s64) = G_MERGE_VALUES %2, %3 - %5:vgpr(s64), %6:vcc(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %3 - S_ENDPGM 0, implicit %5, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:vgpr(i64), %6:vcc(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %3 + S_ENDPGM 0, implicit %5(i64), implicit %6(i1) ... --- @@ -99,11 +99,11 @@ body: | ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 ; GFX12-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s64) = G_MERGE_VALUES %2, %3 - %5:vgpr(s64), %6:vcc(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %3 - S_ENDPGM 0, implicit %5, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:vgpr(i64), %6:vcc(i1) = G_AMDGPU_MAD_I64_I32 %0(i32), %1, %3 + S_ENDPGM 0, implicit %5(i64), implicit %6(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir index a030506f6af59..11143ef90ab39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -41,10 +41,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -64,10 +64,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -87,10 +87,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -110,11 +110,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s96) = G_MERGE_VALUES %0, %1, %2 - $sgpr0_sgpr1_sgpr2 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i96) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32) + $sgpr0_sgpr1_sgpr2 = COPY %3(i96) ... --- @@ -135,11 +135,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s96) = G_MERGE_VALUES %0, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i96) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2 = COPY %3(i96) ... --- @@ -161,12 +161,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(s128) = G_MERGE_VALUES %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(i128) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32), %3(i32) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4(i128) ... --- @@ -188,12 +188,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s128) = G_MERGE_VALUES %0, %1, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i128) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32), %3(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(i128) ... --- @@ -213,10 +213,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %4:sgpr(s128) = G_MERGE_VALUES %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i128) = G_MERGE_VALUES %0(i64), %1(i64) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(i128) ... --- @@ -236,10 +236,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s128) = G_MERGE_VALUES %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i128) = G_MERGE_VALUES %0(i64), %1(i64) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -262,13 +262,13 @@ body: | ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(s32) = COPY $sgpr4 - %5:sgpr(s160) = G_MERGE_VALUES %0, %1, %2, %3, %4 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(i32) = COPY $sgpr4 + %5:sgpr(i160) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32), %3(i32), %4(i32) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY %5(i160) ... --- @@ -291,13 +291,13 @@ body: | ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = COPY $vgpr4 - %5:vgpr(s160) = G_MERGE_VALUES %0, %1, %2, %3, %4 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i32) = COPY $vgpr4 + %5:vgpr(i160) = G_MERGE_VALUES %0(i32), %1(i32), %2(i32), %3(i32), %4(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %5(i160) ... --- @@ -318,11 +318,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = COPY $sgpr4_sgpr5 - %3:sgpr(s192) = G_MERGE_VALUES %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i64) = COPY $sgpr4_sgpr5 + %3:sgpr(i192) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64) + S_ENDPGM 0, implicit %3(i192) ... --- @@ -343,11 +343,11 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(s192) = G_MERGE_VALUES %0, %1, %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(i192) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64) + S_ENDPGM 0, implicit %3(i192) ... --- @@ -369,12 +369,12 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = COPY $sgpr4_sgpr5 - %3:sgpr(s64) = COPY $sgpr6_sgpr7 - %4:sgpr(s256) = G_MERGE_VALUES %0, %1, %2, %3 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i64) = COPY $sgpr4_sgpr5 + %3:sgpr(i64) = COPY $sgpr6_sgpr7 + %4:sgpr(i256) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64), %3(i64) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4(i256) ... --- @@ -394,10 +394,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s128) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %2:sgpr(s256) = G_MERGE_VALUES %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i128) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:sgpr(i256) = G_MERGE_VALUES %0(i128), %1(i128) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2(i256) ... --- @@ -417,10 +417,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, - %1:sgpr(s256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %4:sgpr(s512) = G_MERGE_VALUES %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %4 + %0:sgpr(i256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %2:sgpr(i512) = G_MERGE_VALUES %0(i256), %1(i256) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %2(i512) ... --- @@ -446,16 +446,16 @@ body: | ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $sgpr14_sgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = COPY $sgpr4_sgpr5 - %3:sgpr(s64) = COPY $sgpr6_sgpr7 - %4:sgpr(s64) = COPY $sgpr8_sgpr9 - %5:sgpr(s64) = COPY $sgpr10_sgpr11 - %6:sgpr(s64) = COPY $sgpr12_sgpr13 - %7:sgpr(s64) = COPY $sgpr14_sgpr15 - %8:sgpr(s512) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6, %7 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %8 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i64) = COPY $sgpr4_sgpr5 + %3:sgpr(i64) = COPY $sgpr6_sgpr7 + %4:sgpr(i64) = COPY $sgpr8_sgpr9 + %5:sgpr(i64) = COPY $sgpr10_sgpr11 + %6:sgpr(i64) = COPY $sgpr12_sgpr13 + %7:sgpr(i64) = COPY $sgpr14_sgpr15 + %8:sgpr(i512) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64), %3(i64), %4(i64), %5(i64), %6(i64), %7(i64) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %8(i512) ... --- @@ -481,16 +481,16 @@ body: | ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = COPY $vgpr4_vgpr5 - %3:vgpr(s64) = COPY $vgpr6_vgpr7 - %4:vgpr(s64) = COPY $vgpr8_vgpr9 - %5:vgpr(s64) = COPY $vgpr10_vgpr11 - %6:vgpr(s64) = COPY $vgpr12_vgpr13 - %7:vgpr(s64) = COPY $vgpr14_vgpr15 - %8:vgpr(s512) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6, %7 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = COPY $vgpr4_vgpr5 + %3:vgpr(i64) = COPY $vgpr6_vgpr7 + %4:vgpr(i64) = COPY $vgpr8_vgpr9 + %5:vgpr(i64) = COPY $vgpr10_vgpr11 + %6:vgpr(i64) = COPY $vgpr12_vgpr13 + %7:vgpr(i64) = COPY $vgpr14_vgpr15 + %8:vgpr(i512) = G_MERGE_VALUES %0(i64), %1(i64), %2(i64), %3(i64), %4(i64), %5(i64), %6(i64), %7(i64) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8(i512) ... --- @@ -510,10 +510,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr_32(s32) = COPY $vgpr0 - %1:vgpr_32(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr_32(i32) = COPY $vgpr0 + %1:vgpr_32(i32) = COPY $vgpr1 + %2:vgpr(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -533,10 +533,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vreg_64(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vreg_64(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -558,12 +558,12 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, [[DEF1]], %subreg.sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, - %1:sgpr(s256) = G_IMPLICIT_DEF - %2:sgpr(s256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:sgpr(s256) = G_IMPLICIT_DEF - %4:sgpr(s1024) = G_MERGE_VALUES %0, %1, %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i256) = G_IMPLICIT_DEF + %2:sgpr(i256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:sgpr(i256) = G_IMPLICIT_DEF + %4:sgpr(i1024) = G_MERGE_VALUES %0(i256), %1(i256), %2(i256), %3(i256) + S_ENDPGM 0, implicit %4(i1024) ... --- @@ -584,9 +584,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] - %0:sgpr(s512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s512) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %2:sgpr(s1024) = G_MERGE_VALUES %0, %1 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2 + %0:sgpr(i512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i512) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %2:sgpr(i1024) = G_MERGE_VALUES %0(i512), %1(i512) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2(i1024) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir index ea01058da4ae7..6e2ece42865c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir @@ -16,10 +16,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY]], [[COPY1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MUL_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_MUL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_MUL %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -37,10 +37,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_MUL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_MUL %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -58,10 +58,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_MUL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_MUL %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -79,8 +79,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_MUL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_MUL %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir index b9b024519f61e..80431ab174262 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -35,13 +35,13 @@ body: | ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 0 - %3:vcc(s1) = G_ICMP intpred(eq), %0, %2 - %4:vcc(s1) = G_ICMP intpred(eq), %1, %2 - %5:vcc(s1) = G_OR %3, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 0 + %3:vcc(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:vcc(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:vcc(i1) = G_OR %3, %4 + S_ENDPGM 0, implicit %5(i1) ... # Should fail to select @@ -70,12 +70,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s1) = G_TRUNC %0 - %3:sgpr(s1) = G_TRUNC %1 - %4:sgpr(s1) = G_OR %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i1) = G_TRUNC %0(i32) + %3:sgpr(i1) = G_TRUNC %1(i32) + %4:sgpr(i1) = G_OR %2, %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -103,12 +103,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_OR %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(i16) = G_OR %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -136,12 +136,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_OR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_OR %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -169,10 +169,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -200,10 +200,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i64) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -231,10 +231,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -262,10 +262,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:sgpr(<2 x s32>) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:sgpr(<2 x i32>) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -293,10 +293,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x s16>) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i16>) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -324,10 +324,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -355,10 +355,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -376,22 +376,22 @@ body: | ; WAVE64-LABEL: name: or_s64_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: [[OR:%[0-9]+]]:vgpr(s64) = G_OR [[COPY]], [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[OR]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[OR:%[0-9]+]]:vgpr(i64) = G_OR [[COPY]], [[COPY1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[OR]](i64) ; ; WAVE32-LABEL: name: or_s64_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: [[OR:%[0-9]+]]:vgpr(s64) = G_OR [[COPY]], [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[OR]](s64) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_OR %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[OR:%[0-9]+]]:vgpr(i64) = G_OR [[COPY]], [[COPY1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[OR]](i64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_OR %0, %1 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -427,14 +427,14 @@ body: | ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s1) = G_TRUNC %0 - %3:vgpr(s1) = G_TRUNC %1 - %4:vcc(s1) = COPY %2 - %5:vcc(s1) = COPY %3 - %6:vcc(s1) = G_OR %4, %5 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i1) = G_TRUNC %0(i32) + %3:vgpr(i1) = G_TRUNC %1(i32) + %4:vcc(i1) = COPY %2(i1) + %5:vcc(i1) = COPY %3(i1) + %6:vcc(i1) = G_OR %4, %5 + S_ENDPGM 0, implicit %6(i1) ... # The selector for the copy of the or result may constrain the result @@ -475,17 +475,17 @@ body: | ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %1:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s1) = G_TRUNC %1(s32) - %sgpr0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s1) = G_TRUNC %sgpr0 - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(p1) = G_IMPLICIT_DEF - %9:vcc(s1) = COPY %0(s1) - %10:vcc(s1) = COPY %2(s1) - %8:vcc(s1) = G_OR %9, %10 - %3:sreg_32_xm0(s1) = COPY %8(s1) - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %sgpr0:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %sgpr0(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:sgpr(f64) = G_IMPLICIT_DEF + %6:vcc(i1) = COPY %1(i1) + %7:vcc(i1) = COPY %3(i1) + %8:vcc(i1) = G_OR %6, %7 + %9:sreg_32_xm0(i1) = COPY %8(i1) + S_ENDPGM 0, implicit %9(i1) ... @@ -523,16 +523,16 @@ body: | ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %1:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s1) = G_TRUNC %1(s32) - %sgpr0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s1) = G_TRUNC %sgpr0 - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(p1) = G_IMPLICIT_DEF - %9:vcc(s1) = COPY %0(s1) - %10:vcc(s1) = COPY %2(s1) - %8:vcc(s1) = G_OR %9, %10 - %3:sreg_64_xexec(s1) = COPY %8(s1) - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %sgpr0:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %sgpr0(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:sgpr(f64) = G_IMPLICIT_DEF + %6:vcc(i1) = COPY %1(i1) + %7:vcc(i1) = COPY %3(i1) + %8:vcc(i1) = G_OR %6, %7 + %9:sreg_64_xexec(i1) = COPY %8(i1) + S_ENDPGM 0, implicit %9(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir index 681c366aa3411..73e3db3c62385 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -33,12 +33,12 @@ body: | ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX9-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_ADD %0, %1 - %4:sgpr(s32) = G_ADD %3, %2 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_ADD %0, %1 + %4:sgpr(i32) = G_ADD %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -69,12 +69,12 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_ADD %0, %1 - %4:vgpr(s32) = G_ADD %3, %2 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_ADD %0, %1 + %4:vgpr(i32) = G_ADD %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -106,12 +106,12 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_ADD %0, %1 - %4:vgpr(s32) = G_ADD %3, %2 - S_ENDPGM 0, implicit %4, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_ADD %0, %1 + %4:vgpr(i32) = G_ADD %3, %2 + S_ENDPGM 0, implicit %4(i32), implicit %3(i32) ... --- @@ -144,11 +144,11 @@ body: | ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(p3) = G_PTR_ADD %0, %1 - %4:vgpr(p3) = G_PTR_ADD %3, %2 - S_ENDPGM 0, implicit %4 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %4:vgpr(p3) = G_PTR_ADD %3, %2(i32) + S_ENDPGM 0, implicit %4(p3) ... --- @@ -181,11 +181,11 @@ body: | ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(p5) = G_PTR_ADD %0, %1 - %4:vgpr(p5) = G_PTR_ADD %3, %2 - S_ENDPGM 0, implicit %4 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %4:vgpr(p5) = G_PTR_ADD %3, %2(i32) + S_ENDPGM 0, implicit %4(p5) ... --- @@ -218,12 +218,12 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 %2:vgpr(p3) = COPY $vgpr2 - %3:vgpr(s32) = G_ADD %0, %1 - %4:vgpr(p3) = G_PTR_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %3:vgpr(i32) = G_ADD %0, %1 + %4:vgpr(p3) = G_PTR_ADD %2, %3(i32) + S_ENDPGM 0, implicit %4(p3) ... --- @@ -256,10 +256,10 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 %2:vgpr(p5) = COPY $vgpr2 - %3:vgpr(s32) = G_ADD %0, %1 - %4:vgpr(p5) = G_PTR_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %3:vgpr(i32) = G_ADD %0, %1 + %4:vgpr(p5) = G_PTR_ADD %2, %3(i32) + S_ENDPGM 0, implicit %4(p5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir index b4ee2bbce7678..397553db55814 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir @@ -33,12 +33,12 @@ body: | ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_AND %0, %1 - %4:sgpr(s32) = G_OR %3, %2 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_AND %0, %1 + %4:sgpr(i32) = G_OR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -69,12 +69,12 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_AND_OR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AND %0, %1 - %4:vgpr(s32) = G_OR %3, %2 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_AND %0, %1 + %4:vgpr(i32) = G_OR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -105,12 +105,12 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_AND_OR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AND %0, %1 - %4:vgpr(s32) = G_OR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_AND %0, %1 + %4:vgpr(i32) = G_OR %2, %3 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -144,11 +144,11 @@ body: | ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:sgpr(s32) = G_AND %0, %1 - %4:vgpr(s32) = COPY %3 - %5:vgpr(s32) = G_OR %4, %2 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 + %3:sgpr(i32) = G_AND %0, %1 + %4:vgpr(i32) = COPY %3(i32) + %5:vgpr(i32) = G_OR %4, %2 + S_ENDPGM 0, implicit %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir index 21bca9f9a3ea2..0a435fc0599bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir @@ -33,12 +33,12 @@ body: | ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX9-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_OR %0, %1 - %4:sgpr(s32) = G_OR %3, %2 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_OR %0, %1 + %4:sgpr(i32) = G_OR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -69,12 +69,12 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR3_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_OR %0, %1 - %4:vgpr(s32) = G_OR %3, %2 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_OR %0, %1 + %4:vgpr(i32) = G_OR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -106,11 +106,10 @@ body: | ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_OR %0, %1 - %4:vgpr(s32) = G_OR %3, %2 - S_ENDPGM 0, implicit %4, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_OR %0, %1 + %4:vgpr(i32) = G_OR %3, %2 + S_ENDPGM 0, implicit %4(i32), implicit %3(i32) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir index 39765bf58cfe5..cd00359da81af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir @@ -18,14 +18,14 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_SMAX %0, %1 - %4:vgpr(s32) = G_SMIN %0, %1 - %5:vgpr(s32) = G_SMAX %4, %2 - %6:vgpr(s32) = G_SMIN %3, %5 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_SMAX %0, %1 + %4:vgpr(i32) = G_SMIN %0, %1 + %5:vgpr(i32) = G_SMAX %4, %2 + %6:vgpr(i32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -49,14 +49,14 @@ body: | ; GFX6-NEXT: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[COPY2]], implicit-def dead $scc ; GFX6-NEXT: [[S_MIN_I32_1:%[0-9]+]]:sreg_32 = S_MIN_I32 [[S_MAX_I32_]], [[S_MAX_I32_1]], implicit-def dead $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_I32_1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_SMAX %0, %1 - %4:sgpr(s32) = G_SMIN %0, %1 - %5:sgpr(s32) = G_SMAX %4, %2 - %6:sgpr(s32) = G_SMIN %3, %5 - S_ENDPGM 0, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_SMAX %0, %1 + %4:sgpr(i32) = G_SMIN %0, %1 + %5:sgpr(i32) = G_SMAX %4, %2 + %6:sgpr(i32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -77,14 +77,14 @@ body: | ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_SMAX %0, %1 - %4:vgpr(s32) = G_SMIN %0, %1 - %5:vgpr(s32) = G_SMAX %4, %2 - %6:vgpr(s32) = G_SMIN %3, %5 - S_ENDPGM 0, implicit %6, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_SMAX %0, %1 + %4:vgpr(i32) = G_SMIN %0, %1 + %5:vgpr(i32) = G_SMAX %4, %2 + %6:vgpr(i32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6(i32), implicit %3(i32) ... --- @@ -105,14 +105,14 @@ body: | ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MIN_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_SMAX %0, %1 - %4:vgpr(s32) = G_SMIN %0, %1 - %5:vgpr(s32) = G_SMAX %4, %2 - %6:vgpr(s32) = G_SMIN %3, %5 - S_ENDPGM 0, implicit %6, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_SMAX %0, %1 + %4:vgpr(i32) = G_SMIN %0, %1 + %5:vgpr(i32) = G_SMAX %4, %2 + %6:vgpr(i32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6(i32), implicit %4(i32) ... --- @@ -134,14 +134,14 @@ body: | ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_SMAX %0, %1 - %4:vgpr(s32) = G_SMIN %0, %1 - %5:vgpr(s32) = G_SMAX %4, %2 - %6:vgpr(s32) = G_SMIN %3, %5 - S_ENDPGM 0, implicit %6, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_SMAX %0, %1 + %4:vgpr(i32) = G_SMIN %0, %1 + %5:vgpr(i32) = G_SMAX %4, %2 + %6:vgpr(i32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6(i32), implicit %5(i32) ... --- @@ -163,15 +163,15 @@ body: | ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MED3_I32_e64_1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = G_SMAX %0, %1 - %5:vgpr(s32) = G_SMIN %0, %1 - %6:vgpr(s32) = G_SMIN %2, %4 - %7:vgpr(s32) = G_SMAX %6, %5 - %8:vgpr(s32) = G_SMIN %3, %4 - %9:vgpr(s32) = G_SMAX %8, %5 - S_ENDPGM 0, implicit %7, implicit %9 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i32) = G_SMAX %0, %1 + %5:vgpr(i32) = G_SMIN %0, %1 + %6:vgpr(i32) = G_SMIN %2, %4 + %7:vgpr(i32) = G_SMAX %6, %5 + %8:vgpr(i32) = G_SMIN %3, %4 + %9:vgpr(i32) = G_SMAX %8, %5 + S_ENDPGM 0, implicit %7(i32), implicit %9(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir index 22dd12eac0923..f57583f5b02a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -42,19 +42,19 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_SMAX %3, %4 - %7:vgpr(s16) = G_SMIN %3, %4 - %8:vgpr(s16) = G_SMAX %7, %5 - %9:vgpr(s16) = G_SMIN %6, %8 - - S_ENDPGM 0, implicit %9 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_SMAX %3, %4 + %7:vgpr(i16) = G_SMIN %3, %4 + %8:vgpr(i16) = G_SMAX %7, %5 + %9:vgpr(i16) = G_SMIN %6, %8 + S_ENDPGM 0, implicit %9(i16) + + ... --- @@ -97,19 +97,19 @@ body: | ; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MAX_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_SMAX %3, %4 - %7:vgpr(s16) = G_SMIN %3, %4 - %8:vgpr(s16) = G_SMAX %7, %5 - %9:vgpr(s16) = G_SMIN %6, %8 - - S_ENDPGM 0, implicit %9, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_SMAX %3, %4 + %7:vgpr(i16) = G_SMIN %3, %4 + %8:vgpr(i16) = G_SMAX %7, %5 + %9:vgpr(i16) = G_SMIN %6, %8 + S_ENDPGM 0, implicit %9(i16), implicit %6(i16) + + ... --- @@ -152,19 +152,19 @@ body: | ; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MIN_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_SMAX %3, %4 - %7:vgpr(s16) = G_SMIN %3, %4 - %8:vgpr(s16) = G_SMAX %7, %5 - %9:vgpr(s16) = G_SMIN %6, %8 - - S_ENDPGM 0, implicit %9, implicit %7 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_SMAX %3, %4 + %7:vgpr(i16) = G_SMIN %3, %4 + %8:vgpr(i16) = G_SMAX %7, %5 + %9:vgpr(i16) = G_SMIN %6, %8 + S_ENDPGM 0, implicit %9(i16), implicit %7(i16) + + ... --- @@ -209,17 +209,17 @@ body: | ; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[V_MIN_I16_fake16_e64_]], [[COPY2]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MAX_I16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_SMAX %3, %4 - %7:vgpr(s16) = G_SMIN %3, %4 - %8:vgpr(s16) = G_SMAX %7, %5 - %9:vgpr(s16) = G_SMIN %6, %8 - - S_ENDPGM 0, implicit %9, implicit %8 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_SMAX %3, %4 + %7:vgpr(i16) = G_SMIN %3, %4 + %8:vgpr(i16) = G_SMAX %7, %5 + %9:vgpr(i16) = G_SMIN %6, %8 + S_ENDPGM 0, implicit %9(i16), implicit %8(i16) + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir index 8b0e9709c8029..4b970617b1119 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir @@ -18,14 +18,14 @@ body: | ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_UMAX %0, %1 - %4:vgpr(s32) = G_UMIN %0, %1 - %5:vgpr(s32) = G_UMAX %4, %2 - %6:vgpr(s32) = G_UMIN %3, %5 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_UMAX %0, %1 + %4:vgpr(i32) = G_UMIN %0, %1 + %5:vgpr(i32) = G_UMAX %4, %2 + %6:vgpr(i32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -49,14 +49,14 @@ body: | ; GFX6-NEXT: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[COPY2]], implicit-def dead $scc ; GFX6-NEXT: [[S_MIN_U32_1:%[0-9]+]]:sreg_32 = S_MIN_U32 [[S_MAX_U32_]], [[S_MAX_U32_1]], implicit-def dead $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_UMAX %0, %1 - %4:sgpr(s32) = G_UMIN %0, %1 - %5:sgpr(s32) = G_UMAX %4, %2 - %6:sgpr(s32) = G_UMIN %3, %5 - S_ENDPGM 0, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_UMAX %0, %1 + %4:sgpr(i32) = G_UMIN %0, %1 + %5:sgpr(i32) = G_UMAX %4, %2 + %6:sgpr(i32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6(i32) ... --- @@ -77,14 +77,14 @@ body: | ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_UMAX %0, %1 - %4:vgpr(s32) = G_UMIN %0, %1 - %5:vgpr(s32) = G_UMAX %4, %2 - %6:vgpr(s32) = G_UMIN %3, %5 - S_ENDPGM 0, implicit %6, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_UMAX %0, %1 + %4:vgpr(i32) = G_UMIN %0, %1 + %5:vgpr(i32) = G_UMAX %4, %2 + %6:vgpr(i32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6(i32), implicit %3(i32) ... --- @@ -105,14 +105,14 @@ body: | ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MIN_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_UMAX %0, %1 - %4:vgpr(s32) = G_UMIN %0, %1 - %5:vgpr(s32) = G_UMAX %4, %2 - %6:vgpr(s32) = G_UMIN %3, %5 - S_ENDPGM 0, implicit %6, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_UMAX %0, %1 + %4:vgpr(i32) = G_UMIN %0, %1 + %5:vgpr(i32) = G_UMAX %4, %2 + %6:vgpr(i32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6(i32), implicit %4(i32) ... --- @@ -134,14 +134,14 @@ body: | ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_UMAX %0, %1 - %4:vgpr(s32) = G_UMIN %0, %1 - %5:vgpr(s32) = G_UMAX %4, %2 - %6:vgpr(s32) = G_UMIN %3, %5 - S_ENDPGM 0, implicit %6, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_UMAX %0, %1 + %4:vgpr(i32) = G_UMIN %0, %1 + %5:vgpr(i32) = G_UMAX %4, %2 + %6:vgpr(i32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6(i32), implicit %5(i32) ... --- @@ -163,15 +163,15 @@ body: | ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MED3_U32_e64_1]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = G_UMAX %0, %1 - %5:vgpr(s32) = G_UMIN %0, %1 - %6:vgpr(s32) = G_UMIN %2, %4 - %7:vgpr(s32) = G_UMAX %6, %5 - %8:vgpr(s32) = G_UMIN %3, %4 - %9:vgpr(s32) = G_UMAX %8, %5 - S_ENDPGM 0, implicit %7, implicit %9 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i32) = G_UMAX %0, %1 + %5:vgpr(i32) = G_UMIN %0, %1 + %6:vgpr(i32) = G_UMIN %2, %4 + %7:vgpr(i32) = G_UMAX %6, %5 + %8:vgpr(i32) = G_UMIN %3, %4 + %9:vgpr(i32) = G_UMAX %8, %5 + S_ENDPGM 0, implicit %7(i32), implicit %9(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir index 6e1489e3227d9..6ad2bbca042f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -42,19 +42,19 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_UMAX %3, %4 - %7:vgpr(s16) = G_UMIN %3, %4 - %8:vgpr(s16) = G_UMAX %7, %5 - %9:vgpr(s16) = G_UMIN %6, %8 - - S_ENDPGM 0, implicit %9 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_UMAX %3, %4 + %7:vgpr(i16) = G_UMIN %3, %4 + %8:vgpr(i16) = G_UMAX %7, %5 + %9:vgpr(i16) = G_UMIN %6, %8 + S_ENDPGM 0, implicit %9(i16) + + ... --- @@ -97,19 +97,19 @@ body: | ; GFX11-NEXT: [[V_MAX_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MAX_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_UMAX %3, %4 - %7:vgpr(s16) = G_UMIN %3, %4 - %8:vgpr(s16) = G_UMAX %7, %5 - %9:vgpr(s16) = G_UMIN %6, %8 - - S_ENDPGM 0, implicit %9, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_UMAX %3, %4 + %7:vgpr(i16) = G_UMIN %3, %4 + %8:vgpr(i16) = G_UMAX %7, %5 + %9:vgpr(i16) = G_UMIN %6, %8 + S_ENDPGM 0, implicit %9(i16), implicit %6(i16) + + ... --- @@ -152,19 +152,19 @@ body: | ; GFX11-NEXT: [[V_MIN_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MIN_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_UMAX %3, %4 - %7:vgpr(s16) = G_UMIN %3, %4 - %8:vgpr(s16) = G_UMAX %7, %5 - %9:vgpr(s16) = G_UMIN %6, %8 - - S_ENDPGM 0, implicit %9, implicit %7 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_UMAX %3, %4 + %7:vgpr(i16) = G_UMIN %3, %4 + %8:vgpr(i16) = G_UMAX %7, %5 + %9:vgpr(i16) = G_UMIN %6, %8 + S_ENDPGM 0, implicit %9(i16), implicit %7(i16) + + ... --- @@ -209,17 +209,17 @@ body: | ; GFX11-NEXT: [[V_MAX_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_fake16_e64 [[V_MIN_U16_fake16_e64_]], [[COPY2]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MAX_U16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s16) = G_TRUNC %0 - %4:vgpr(s16) = G_TRUNC %1 - %5:vgpr(s16) = G_TRUNC %2 - - %6:vgpr(s16) = G_UMAX %3, %4 - %7:vgpr(s16) = G_UMIN %3, %4 - %8:vgpr(s16) = G_UMAX %7, %5 - %9:vgpr(s16) = G_UMIN %6, %8 - - S_ENDPGM 0, implicit %9, implicit %8 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i16) = G_TRUNC %0(i32) + %4:vgpr(i16) = G_TRUNC %1(i32) + %5:vgpr(i16) = G_TRUNC %2(i32) + %6:vgpr(i16) = G_UMAX %3, %4 + %7:vgpr(i16) = G_UMIN %3, %4 + %8:vgpr(i16) = G_UMAX %7, %5 + %9:vgpr(i16) = G_UMIN %6, %8 + S_ENDPGM 0, implicit %9(i16), implicit %8(i16) + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir index 49a2eabda270f..48efae4901362 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir @@ -43,12 +43,12 @@ body: | ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_XOR %0, %1 - %4:sgpr(s32) = G_XOR %3, %2 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_XOR %0, %1 + %4:sgpr(i32) = G_XOR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... --- @@ -89,12 +89,12 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR3_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_XOR %0, %1 - %4:vgpr(s32) = G_XOR %3, %2 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_XOR %0, %1 + %4:vgpr(i32) = G_XOR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... # Mixed SGPR and VGPR, with full copy from scalar xor to VGPR, as @@ -143,13 +143,13 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:sgpr(s32) = G_XOR %0, %1 - %4:vgpr(s32) = COPY %3 - %5:vgpr(s32) = G_XOR %4, %2 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 + %3:sgpr(i32) = G_XOR %0, %1 + %4:vgpr(i32) = COPY %3(i32) + %5:vgpr(i32) = G_XOR %4, %2 + S_ENDPGM 0, implicit %5(i32) ... --- @@ -195,13 +195,13 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:sgpr(s32) = G_XOR %0, %1 - %4:vgpr(s32) = COPY %3 - %5:vgpr(s32) = G_XOR %2, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 + %3:sgpr(i32) = G_XOR %0, %1 + %4:vgpr(i32) = COPY %3(i32) + %5:vgpr(i32) = G_XOR %2, %4 + S_ENDPGM 0, implicit %5(i32) ... --- @@ -243,10 +243,10 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR3_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:sgpr(s32) = G_XOR %0, %1 - %4:vgpr(s32) = G_XOR %3, %2 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 + %3:sgpr(i32) = G_XOR %0, %1 + %4:vgpr(i32) = G_XOR %3, %2 + S_ENDPGM 0, implicit %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir index 4bb9eb807e156..f8105291f2ad6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -33,25 +33,31 @@ body: | ; GCN-NEXT: $sgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:sgpr(s32) = COPY %1 + successors: %bb.2(0x80000000) + + %5:sgpr(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:sgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 - $sgpr0 = COPY %6 + %6:sgpr(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $sgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- @@ -87,25 +93,31 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:sgpr(s32) = COPY %1 + successors: %bb.2(0x80000000) + + %5:sgpr(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:vgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 - $vgpr0 = COPY %6(s32) + %6:vgpr(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- @@ -140,25 +152,31 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:vgpr(s32) = COPY %1 + successors: %bb.2(0x80000000) + + %5:vgpr(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:vgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 - $vgpr0 = COPY %6 + %6:vgpr(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- @@ -194,25 +212,31 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $vgpr0, $sgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:vgpr(s32) = COPY %1 + successors: %bb.2(0x80000000) + + %5:vgpr(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:vgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 - $vgpr0 = COPY %6 + %6:vgpr(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- @@ -247,25 +271,31 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4 - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s32) = COPY $sgpr4 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i32) = COPY $sgpr4 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:sgpr(s64) = COPY %1 + successors: %bb.2(0x80000000) + + %5:sgpr(i64) = COPY %1(i64) G_BR %bb.2 bb.2: - %6:sgpr(s64) = G_PHI %0(s64), %bb.0, %5(s64), %bb.1 - $sgpr0_sgpr1 = COPY %6 + %6:sgpr(i64) = G_PHI %0(i64), %bb.0, %5(i64), %bb.1 + $sgpr0_sgpr1 = COPY %6(i64) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- name: g_phi_v2s16_vv_sbranch @@ -300,25 +330,31 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr2 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:sgpr(<2 x s16>) = COPY %1 + successors: %bb.2(0x80000000) + + %5:sgpr(<2 x i16>) = COPY %1(<2 x i16>) G_BR %bb.2 bb.2: - %6:vgpr(<2 x s16>) = G_PHI %0(<2 x s16>), %bb.0, %5(<2 x s16>), %bb.1 - $vgpr0 = COPY %6 + %6:vgpr(<2 x i16>) = G_PHI %0(<2 x i16>), %bb.0, %5(<2 x i16>), %bb.1 + $vgpr0 = COPY %6(<2 x i16>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- @@ -353,25 +389,31 @@ body: | ; GCN-NEXT: $sgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:sgpr(s32) = COPY %1 + successors: %bb.2(0x80000000) + + %5:sgpr(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:sgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 - $sgpr0 = COPY %6(s32) + %6:sgpr(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $sgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... --- @@ -407,23 +449,29 @@ body: | ; GCN-NEXT: $vgpr0 = COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %4, %bb.1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i32), %bb.1 G_BR %bb.2 bb.1: - %5:sgpr(s32) = COPY %1 + successors: %bb.2(0x80000000) + + %5:sgpr(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:vgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 - $vgpr0 = COPY %6 + %6:vgpr(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pseudo-scalar-transcendental.mir index 916ead0ec4b6a..d530d00d8eb41 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pseudo-scalar-transcendental.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pseudo-scalar-transcendental.mir @@ -15,10 +15,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_S_EXP_F32_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_S_EXP_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_EXP_F32_e64_]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %1(f32) + %3:sgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -37,11 +40,13 @@ body: | ; CHECK-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_EXP_F16_e64_]] ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %2(f16) + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -58,10 +63,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_S_LOG_F32_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_LOG_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_S_LOG_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_LOG_F32_e64_]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %1(f32) + %3:sgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -80,11 +88,13 @@ body: | ; CHECK-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_LOG_F16_e64_]] ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %2(f16) + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -101,10 +111,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_S_RCP_F32_e64_:%[0-9]+]]:sreg_32_xexec = nnan ninf nsz arcp contract afn reassoc nofpexcept V_S_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_S_RCP_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0(s32) - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_RCP_F32_e64_]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f32) + %3:sgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -123,11 +136,13 @@ body: | ; CHECK-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sreg_32_xexec = nnan ninf nsz arcp contract afn reassoc nofpexcept V_S_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_RCP_F16_e64_]] ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(s16) - %3:sgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f16) + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -144,10 +159,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_S_RSQ_F32_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_S_RSQ_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0(s32) - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_RSQ_F32_e64_]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(f32) + %3:sgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -166,11 +184,13 @@ body: | ; CHECK-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_RSQ_F16_e64_]] ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(s16) - %3:sgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %2(f16) + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -187,10 +207,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_S_SQRT_F32_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_SQRT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_S_SQRT_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FSQRT %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_SQRT_F32_e64_]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FSQRT %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -209,11 +232,13 @@ body: | ; CHECK-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_SQRT_F16_e64_]] ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_FSQRT %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FSQRT %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -230,10 +255,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[V_S_SQRT_F32_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_SQRT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_S_SQRT_F32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %0(s32) - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_SQRT_F32_e64_]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %1(f32) + %3:sgpr(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -252,10 +280,12 @@ body: | ; CHECK-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sreg_32_xexec = nofpexcept V_S_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_S_SQRT_F16_e64_]] ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %1(s16) - %3:sgpr(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %2(f16) + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir index 41e416e3f5d72..b090d656c478e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir @@ -85,9 +85,9 @@ body: | ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(p0) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(p0) = G_PTR_ADD %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -169,9 +169,9 @@ body: | ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(p0) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(p0) = G_PTR_ADD %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -253,9 +253,9 @@ body: | ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:vgpr(s64) = COPY $vgpr0_vgpr1 - %2:vgpr(p0) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i64) = COPY $vgpr0_vgpr1 + %2:vgpr(p0) = G_PTR_ADD %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -307,9 +307,9 @@ body: | ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(p3) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(p3) = G_PTR_ADD %0, %1(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -361,9 +361,9 @@ body: | ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -415,9 +415,9 @@ body: | ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:sgpr(p3) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -469,9 +469,9 @@ body: | ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p6) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(p6) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(p6) = G_PTR_ADD %0, %1(i32) + S_ENDPGM 0, implicit %2(p6) ... @@ -523,9 +523,9 @@ body: | ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p2) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(p2) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(p2) = G_PTR_ADD %0, %1(i32) + S_ENDPGM 0, implicit %2(p2) ... @@ -607,9 +607,9 @@ body: | ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(p999) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(p999) = G_PTR_ADD %0, %1(i64) + S_ENDPGM 0, implicit %2(p999) ... @@ -691,8 +691,8 @@ body: | ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(p999) = G_PTR_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(p999) = G_PTR_ADD %0, %1(i64) + S_ENDPGM 0, implicit %2(p999) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir index 2a3d97d603b13..e2e60ea971263 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -18,9 +18,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(p3) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(p3) = G_PTRMASK %0, %1(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -41,9 +41,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -252645136 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -252645136 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -64,9 +64,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -1 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -1 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -87,9 +87,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 0 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 0 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -110,9 +110,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -2147483648 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -2147483648 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -133,9 +133,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -1073741824 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -1073741824 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -156,9 +156,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -2 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -2 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -179,9 +179,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -4 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -4 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -202,9 +202,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -8 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -8 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -225,9 +225,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -16 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -16 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -248,9 +248,9 @@ body: | ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -536870912 - %1:sgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -536870912 + %2:sgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -271,9 +271,9 @@ body: | ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(p0) = G_PTRMASK %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -295,9 +295,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -1 - %2:sgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i64) = G_CONSTANT i64 -1 + %2:sgpr(p0) = G_PTRMASK %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -318,9 +318,9 @@ body: | ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 0 - %2:sgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i64) = G_CONSTANT i64 0 + %2:sgpr(p0) = G_PTRMASK %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -341,9 +341,9 @@ body: | ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 - %2:sgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:sgpr(i64) = G_CONSTANT i64 -1085102592571150096 + %2:sgpr(p0) = G_PTRMASK %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -364,9 +364,9 @@ body: | ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -9223372036854775808 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -391,9 +391,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -4294967296 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -4294967296 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -414,9 +414,9 @@ body: | ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 4294967296 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 4294967296 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -441,9 +441,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -2 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -2 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -468,9 +468,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -4 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -4 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -495,9 +495,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -8 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -8 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -522,9 +522,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -16 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -16 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -549,9 +549,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s64) = G_CONSTANT i64 -536870912 - %1:sgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i64) = G_CONSTANT i64 -536870912 + %2:sgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -572,9 +572,9 @@ body: | ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %const:vgpr(s32) = G_CONSTANT i32 -252645136 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i32) = G_CONSTANT i32 -252645136 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -595,9 +595,9 @@ body: | ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %const:vgpr(s32) = G_CONSTANT i32 -2 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i32) = G_CONSTANT i32 -2 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -618,9 +618,9 @@ body: | ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %const:vgpr(s32) = G_CONSTANT i32 -4 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i32) = G_CONSTANT i32 -4 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -641,9 +641,9 @@ body: | ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %const:vgpr(s32) = G_CONSTANT i32 -8 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i32) = G_CONSTANT i32 -8 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -664,9 +664,9 @@ body: | ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %const:vgpr(s32) = G_CONSTANT i32 -16 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i32) = G_CONSTANT i32 -16 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -687,9 +687,9 @@ body: | ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 - %const:vgpr(s32) = G_CONSTANT i32 -536870912 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i32) = G_CONSTANT i32 -536870912 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -716,9 +716,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(p0) = G_PTRMASK %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -745,9 +745,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096 - %2:vgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 + %1:vgpr(i64) = G_CONSTANT i64 -1085102592571150096 + %2:vgpr(p0) = G_PTRMASK %0, %1(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -772,9 +772,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %const:vgpr(s64) = G_CONSTANT i64 -2 - %1:vgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i64) = G_CONSTANT i64 -2 + %2:vgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -799,9 +799,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %const:vgpr(s64) = G_CONSTANT i64 -4 - %1:vgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i64) = G_CONSTANT i64 -4 + %2:vgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -826,9 +826,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %const:vgpr(s64) = G_CONSTANT i64 -4 - %1:vgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i64) = G_CONSTANT i64 -4 + %2:vgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -853,9 +853,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %const:vgpr(s64) = G_CONSTANT i64 -16 - %1:vgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i64) = G_CONSTANT i64 -16 + %2:vgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -880,9 +880,9 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %const:vgpr(s64) = G_CONSTANT i64 -536870912 - %1:vgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:vgpr(i64) = G_CONSTANT i64 -536870912 + %2:vgpr(p0) = G_PTRMASK %0, %const(i64) + S_ENDPGM 0, implicit %2(p0) ... @@ -899,13 +899,13 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: %const:sgpr(s32) = G_CONSTANT i32 -4 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(s32) + ; CHECK-NEXT: %const:sgpr(i32) = G_CONSTANT i32 -4 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(i32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p3) %0:sgpr(p3) = COPY $sgpr0 - %const:sgpr(s32) = G_CONSTANT i32 -4 - %1:vgpr(p3) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -4 + %2:vgpr(p3) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p3) ... @@ -922,12 +922,12 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %const:sgpr(s32) = G_CONSTANT i32 -4 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], %const(s32) + ; CHECK-NEXT: %const:sgpr(i32) = G_CONSTANT i32 -4 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], %const(i32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p0) %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %const:sgpr(s32) = G_CONSTANT i32 -4 - %1:vgpr(p0) = G_PTRMASK %0, %const - S_ENDPGM 0, implicit %1 + %const:sgpr(i32) = G_CONSTANT i32 -4 + %2:vgpr(p0) = G_PTRMASK %0, %const(i32) + S_ENDPGM 0, implicit %2(p0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir index 60417c290477b..3dff9a6a06b6c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir @@ -17,8 +17,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(s32) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:sgpr(i32) = G_PTRTOINT %0(p3) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -38,8 +38,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(p5) = COPY $sgpr0 - %1:sgpr(s32) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:sgpr(i32) = G_PTRTOINT %0(p5) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -59,8 +59,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:sgpr(i64) = G_PTRTOINT %0(p0) + S_ENDPGM 0, implicit %1(i64) ... --- @@ -80,8 +80,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:sgpr(i64) = G_PTRTOINT %0(p1) + S_ENDPGM 0, implicit %1(i64) ... --- @@ -101,6 +101,6 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:sgpr(i64) = G_PTRTOINT %0(p999) + S_ENDPGM 0, implicit %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir index 9e50f4d9fccaf..b9470ef3ff6f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir @@ -19,7 +19,7 @@ body: | ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] %0:sgpr(p0) = COPY $sgpr30_sgpr31 %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 - S_ENDPGM 0, implicit %0, implicit %1 + S_ENDPGM 0, implicit %0(p0), implicit %1(p0) ... --- @@ -39,7 +39,7 @@ body: | ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]] %0:sgpr(p0) = COPY $sgpr30_sgpr31 %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 - S_ENDPGM 0, implicit %0, implicit %1 + S_ENDPGM 0, implicit %0(p0), implicit %1(p0) ... --- @@ -57,7 +57,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) ... --- @@ -78,11 +78,14 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] bb.0: + successors: %bb.1(0x80000000) + G_BR %bb.1 bb.1: %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) + ... --- @@ -103,12 +106,15 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] bb.0: + successors: %bb.1(0x80000000) + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 G_BR %bb.1 bb.1: %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 - S_ENDPGM 0, implicit %0, implicit %1 + S_ENDPGM 0, implicit %0(p0), implicit %1(p0) + ... --- @@ -125,5 +131,5 @@ body: | ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]] %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir index 98399b5e3e133..40441c3234dc3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir @@ -21,11 +21,11 @@ body: | ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 2 - %2:vgpr(s32) = G_CONSTANT i32 10 - %3:vgpr(s32) = G_SBFX %0, %1(s32), %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CONSTANT i32 2 + %2:vgpr(i32) = G_CONSTANT i32 10 + %3:vgpr(i32) = G_SBFX %0, %1(i32), %2 + S_ENDPGM 0, implicit %3(i32) ... --- @@ -44,9 +44,9 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_SBFX %0, %1(s32), %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_SBFX %0, %1(i32), %2 + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir index 130f87e44eac7..202c8cebef312 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir @@ -16,9 +16,10 @@ body: | ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[S_CVT_F32_I32_:%[0-9]+]]:sreg_32 = S_CVT_F32_I32 [[COPY]], implicit $mode ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F32_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_SITOFP %0(s32) - $sgpr0 = COPY %1(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_SITOFP %0(i32) + %2:sgpr(i32) = G_BITCAST %1(f32) + $sgpr0 = COPY %2(i32) ... --- @@ -35,9 +36,10 @@ body: | ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[S_CVT_F32_U32_:%[0-9]+]]:sreg_32 = S_CVT_F32_U32 [[COPY]], implicit $mode ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F32_U32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_UITOFP %0(s32) - $sgpr0 = COPY %1(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_UITOFP %0(i32) + %2:sgpr(i32) = G_BITCAST %1(f32) + $sgpr0 = COPY %2(i32) ... --- @@ -52,11 +54,12 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CVT_I32_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FPTOSI %0(s32) - $sgpr0 = COPY %1(s32) + ; GFX1150-NEXT: [[S_CVT_I32_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_I32_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_I32_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(i32) = G_FPTOSI %1(f32) + $sgpr0 = COPY %2(i32) ... --- @@ -71,11 +74,12 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CVT_U32_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FPTOUI %0(s32) - $sgpr0 = COPY %1(s32) + ; GFX1150-NEXT: [[S_CVT_U32_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_U32_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_U32_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(i32) = G_FPTOUI %1(f32) + $sgpr0 = COPY %2(i32) ... --- @@ -90,12 +94,14 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_CVT_F32_F16 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = G_FPEXT %1(s16) - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_CVT_F32_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_F32_F16 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F32_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f32) = G_FPEXT %2(f16) + %4:sgpr(i32) = G_BITCAST %3(f32) + $sgpr0 = COPY %4(i32) ... --- @@ -110,15 +116,19 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: [[S_CVT_HI_F32_F16_:%[0-9]+]]:sreg_32 = S_CVT_HI_F32_F16 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_HI_F32_F16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %2:sgpr(s32) = G_BITCAST %0(<2 x s16>) - %3:sgpr(s32) = G_CONSTANT i32 16 - %4:sgpr(s32) = G_LSHR %2, %3(s32) - %5:sgpr(s16) = G_TRUNC %4(s32) - %6:sgpr(s32) = G_FPEXT %5(s16) - $sgpr0 = COPY %6(s32) + ; GFX1150-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX1150-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX1150-NEXT: [[S_CVT_F32_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_F32_F16 [[S_LSHR_B32_]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F32_F16_]] + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(i32) = G_BITCAST %0(<2 x i16>) + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %1, %2(i32) + %4:sgpr(i16) = G_TRUNC %3(i32) + %5:sgpr(f16) = G_BITCAST %4(i16) + %6:sgpr(f32) = G_FPEXT %5(f16) + %7:sgpr(i32) = G_BITCAST %6(f32) + $sgpr0 = COPY %7(i32) ... --- @@ -133,12 +143,14 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CVT_F16_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_FPTRUNC %0(s32) - %2:sgpr(s32) = G_ANYEXT %1(s16) - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_CVT_F16_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_F16_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F16_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f16) = G_FPTRUNC %1(f32) + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:sgpr(i32) = G_ANYEXT %3(i16) + $sgpr0 = COPY %4(i32) ... --- @@ -153,11 +165,13 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CEIL_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FCEIL %0 - $sgpr0 = COPY %1(s32) + ; GFX1150-NEXT: [[S_CEIL_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CEIL_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CEIL_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FCEIL %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $sgpr0 = COPY %3(i32) ... --- @@ -172,11 +186,13 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_FLOOR_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FFLOOR %0 - $sgpr0 = COPY %1(s32) + ; GFX1150-NEXT: [[S_FLOOR_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_FLOOR_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_FLOOR_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_FFLOOR %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $sgpr0 = COPY %3(i32) ... --- @@ -191,11 +207,13 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_TRUNC_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC_TRUNC %0 - $sgpr0 = COPY %1(s32) + ; GFX1150-NEXT: [[S_TRUNC_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_TRUNC_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_TRUNC_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_INTRINSIC_TRUNC %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $sgpr0 = COPY %3(i32) ... --- @@ -210,11 +228,13 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_RNDNE_F32 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_INTRINSIC_ROUNDEVEN %0 - $sgpr0 = COPY %1(s32) + ; GFX1150-NEXT: [[S_RNDNE_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_RNDNE_F32 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_RNDNE_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(f32) = G_BITCAST %0(i32) + %2:sgpr(f32) = G_INTRINSIC_ROUNDEVEN %1 + %3:sgpr(i32) = G_BITCAST %2(f32) + $sgpr0 = COPY %3(i32) ... --- @@ -229,13 +249,15 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_CEIL_F16 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_FCEIL %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $sgpr0 = COPY %3(s32) + ; GFX1150-NEXT: [[S_CEIL_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_CEIL_F16 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CEIL_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FCEIL %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -250,13 +272,15 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_FLOOR_F16 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_FFLOOR %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $sgpr0 = COPY %3(s32) + ; GFX1150-NEXT: [[S_FLOOR_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_FLOOR_F16 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_FLOOR_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_FFLOOR %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -271,13 +295,15 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_TRUNC_F16 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_INTRINSIC_TRUNC %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $sgpr0 = COPY %3(s32) + ; GFX1150-NEXT: [[S_TRUNC_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_TRUNC_F16 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_TRUNC_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_INTRINSIC_TRUNC %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -292,12 +318,14 @@ body: | ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_RNDNE_F16 [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_INTRINSIC_ROUNDEVEN %1 - %3:sgpr(s32) = G_ANYEXT %2(s16) - $sgpr0 = COPY %3(s32) + ; GFX1150-NEXT: [[S_RNDNE_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_RNDNE_F16 [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_RNDNE_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_BITCAST %1(i16) + %3:sgpr(f16) = G_INTRINSIC_ROUNDEVEN %2 + %4:sgpr(i16) = G_BITCAST %3(f16) + %5:sgpr(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir index d80a13c4d7c79..6ca5cf68c7016 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir @@ -15,12 +15,15 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_ADD_F32 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FADD %0, %1 - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_ADD_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_ADD_F32 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_ADD_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_FADD %2, %3 + %5:sgpr(i32) = G_BITCAST %4(f32) + $sgpr0 = COPY %5(i32) ... --- @@ -36,12 +39,15 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_SUB_F32 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FSUB %0, %1 - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_SUB_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_SUB_F32 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_SUB_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_FSUB %2, %3 + %5:sgpr(i32) = G_BITCAST %4(f32) + $sgpr0 = COPY %5(i32) ... --- @@ -57,12 +63,15 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_MUL_F32 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FMUL %0, %1 - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_MUL_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_MUL_F32 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_MUL_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_FMUL %2, %3 + %5:sgpr(i32) = G_BITCAST %4(f32) + $sgpr0 = COPY %5(i32) ... --- @@ -78,12 +87,15 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_MIN_F32 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FMINNUM %0, %1 - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_MIN_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_MIN_F32 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_MIN_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_FMINNUM %2, %3 + %5:sgpr(i32) = G_BITCAST %4(f32) + $sgpr0 = COPY %5(i32) ... --- @@ -99,12 +111,15 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_MAX_F32 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FMAXNUM %0, %1 - $sgpr0 = COPY %2(s32) + ; GFX1150-NEXT: [[S_MAX_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_MAX_F32 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_MAX_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_FMAXNUM %2, %3 + %5:sgpr(i32) = G_BITCAST %4(f32) + $sgpr0 = COPY %5(i32) ... --- @@ -120,15 +135,18 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_ADD_F16 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %4 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s16) = G_FADD %1, %3 - %5:sgpr(s32) = G_ANYEXT %4(s16) - $sgpr0 = COPY %5(s32) + ; GFX1150-NEXT: [[S_ADD_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_ADD_F16 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_ADD_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(f16) = G_FADD %4, %5 + %7:sgpr(i16) = G_BITCAST %6(f16) + %8:sgpr(i32) = G_ANYEXT %7(i16) + $sgpr0 = COPY %8(i32) ... --- @@ -144,15 +162,18 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_SUB_F16 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %4 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s16) = G_FSUB %1, %3 - %5:sgpr(s32) = G_ANYEXT %4(s16) - $sgpr0 = COPY %5(s32) + ; GFX1150-NEXT: [[S_SUB_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_SUB_F16 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_SUB_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(f16) = G_FSUB %4, %5 + %7:sgpr(i16) = G_BITCAST %6(f16) + %8:sgpr(i32) = G_ANYEXT %7(i16) + $sgpr0 = COPY %8(i32) ... --- @@ -168,15 +189,18 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_MUL_F16 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %4 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s16) = G_FMUL %1, %3 - %5:sgpr(s32) = G_ANYEXT %4(s16) - $sgpr0 = COPY %5(s32) + ; GFX1150-NEXT: [[S_MUL_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_MUL_F16 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_MUL_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(f16) = G_FMUL %4, %5 + %7:sgpr(i16) = G_BITCAST %6(f16) + %8:sgpr(i32) = G_ANYEXT %7(i16) + $sgpr0 = COPY %8(i32) ... --- @@ -192,15 +216,18 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_MIN_F16 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %4 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s16) = G_FMINNUM %1, %3 - %5:sgpr(s32) = G_ANYEXT %4(s16) - $sgpr0 = COPY %5(s32) + ; GFX1150-NEXT: [[S_MIN_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_MIN_F16 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_MIN_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(f16) = G_FMINNUM %4, %5 + %7:sgpr(i16) = G_BITCAST %6(f16) + %8:sgpr(i32) = G_ANYEXT %7(i16) + $sgpr0 = COPY %8(i32) ... --- @@ -213,12 +240,15 @@ body: | ; GFX1150-LABEL: name: fmax_f16 ; GFX1150: liveins: $sgpr0, $sgpr1 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s16) = G_FMAXNUM %1, %3 - %5:sgpr(s32) = G_ANYEXT %4(s16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(f16) = G_FMAXNUM %4, %5 + %7:sgpr(i16) = G_BITCAST %6(f16) + %8:sgpr(i32) = G_ANYEXT %7(i16) ... --- @@ -234,12 +264,15 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_CVT_PK_RTZ_F16_F32 [[COPY]], [[COPY1]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0(s32), %1(s32) - $sgpr0 = COPY %2(<2 x s16>) + ; GFX1150-NEXT: [[S_CVT_PK_RTZ_F16_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_PK_RTZ_F16_F32 [[COPY]], [[COPY1]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_PK_RTZ_F16_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(<2 x f16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) + %5:sgpr(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $sgpr0 = COPY %5(<2 x i16>) ... --- @@ -256,13 +289,17 @@ body: | ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX1150-NEXT: %3:sreg_32 = nofpexcept S_FMAC_F32 [[COPY1]], [[COPY2]], [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %3 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_FMA %1, %2, %0 - $sgpr0 = COPY %3(s32) + ; GFX1150-NEXT: [[S_FMAC_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_FMAC_F32 [[COPY1]], [[COPY2]], [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_FMAC_F32_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(f32) = G_BITCAST %2(i32) + %5:sgpr(f32) = G_BITCAST %0(i32) + %6:sgpr(f32) = G_FMA %3, %4, %5 + %7:sgpr(i32) = G_BITCAST %6(f32) + $sgpr0 = COPY %7(i32) ... --- @@ -279,16 +316,20 @@ body: | ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX1150-NEXT: %6:sreg_32 = nofpexcept S_FMAC_F16 [[COPY1]], [[COPY2]], [[COPY]], implicit $mode - ; GFX1150-NEXT: $sgpr0 = COPY %6 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = COPY $sgpr2 - %5:sgpr(s16) = G_TRUNC %4(s32) - %6:sgpr(s16) = G_FMA %3, %5, %1 - %7:sgpr(s32) = G_ANYEXT %6(s16) - $sgpr0 = COPY %7(s32) + ; GFX1150-NEXT: [[S_FMAC_F16_:%[0-9]+]]:sreg_32 = nofpexcept S_FMAC_F16 [[COPY1]], [[COPY2]], [[COPY]], implicit $mode + ; GFX1150-NEXT: $sgpr0 = COPY [[S_FMAC_F16_]] + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(i32) = COPY $sgpr2 + %5:sgpr(i16) = G_TRUNC %4(i32) + %6:sgpr(f16) = G_BITCAST %3(i16) + %7:sgpr(f16) = G_BITCAST %5(i16) + %8:sgpr(f16) = G_BITCAST %1(i16) + %9:sgpr(f16) = G_FMA %6, %7, %8 + %10:sgpr(i16) = G_BITCAST %9(f16) + %11:sgpr(i32) = G_ANYEXT %10(i16) + $sgpr0 = COPY %11(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir index c75a2926e7cf9..115a52e30f644 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir @@ -18,10 +18,12 @@ body: | ; GFX1150-NEXT: S_CMP_LT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(olt), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(olt), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -40,10 +42,12 @@ body: | ; GFX1150-NEXT: S_CMP_EQ_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(oeq), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(oeq), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -62,10 +66,12 @@ body: | ; GFX1150-NEXT: S_CMP_LE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ole), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ole), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -84,10 +90,12 @@ body: | ; GFX1150-NEXT: S_CMP_GT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ogt), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ogt), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -106,10 +114,12 @@ body: | ; GFX1150-NEXT: S_CMP_LG_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(one), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(one), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -128,10 +138,12 @@ body: | ; GFX1150-NEXT: S_CMP_GE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(oge), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(oge), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -150,10 +162,12 @@ body: | ; GFX1150-NEXT: S_CMP_O_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ord), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ord), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -172,10 +186,12 @@ body: | ; GFX1150-NEXT: S_CMP_U_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(uno), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(uno), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -194,10 +210,12 @@ body: | ; GFX1150-NEXT: S_CMP_NGE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ult), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ult), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -216,10 +234,12 @@ body: | ; GFX1150-NEXT: S_CMP_NLG_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ueq), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ueq), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -238,10 +258,12 @@ body: | ; GFX1150-NEXT: S_CMP_NGT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ule), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ule), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -260,10 +282,12 @@ body: | ; GFX1150-NEXT: S_CMP_NLE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(ugt), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(ugt), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -282,10 +306,12 @@ body: | ; GFX1150-NEXT: S_CMP_NEQ_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(une), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(une), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -304,10 +330,12 @@ body: | ; GFX1150-NEXT: S_CMP_NLT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_FCMP floatpred(uge), %0(s32), %1 - $sgpr0 = COPY %2(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(f32) = G_BITCAST %0(i32) + %3:sgpr(f32) = G_BITCAST %1(i32) + %4:sgpr(i32) = G_FCMP floatpred(uge), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... --- @@ -326,12 +354,14 @@ body: | ; GFX1150-NEXT: S_CMP_LT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(olt), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(olt), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -350,12 +380,14 @@ body: | ; GFX1150-NEXT: S_CMP_EQ_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(oeq), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(oeq), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -374,12 +406,14 @@ body: | ; GFX1150-NEXT: S_CMP_LE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ole), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ole), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -398,12 +432,14 @@ body: | ; GFX1150-NEXT: S_CMP_GT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ogt), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ogt), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -422,12 +458,14 @@ body: | ; GFX1150-NEXT: S_CMP_LG_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(one), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(one), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -446,12 +484,14 @@ body: | ; GFX1150-NEXT: S_CMP_GE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(oge), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(oge), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -470,12 +510,14 @@ body: | ; GFX1150-NEXT: S_CMP_O_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ord), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ord), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -494,12 +536,14 @@ body: | ; GFX1150-NEXT: S_CMP_U_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(uno), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(uno), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -518,12 +562,14 @@ body: | ; GFX1150-NEXT: S_CMP_NGE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ult), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ult), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -542,12 +588,14 @@ body: | ; GFX1150-NEXT: S_CMP_NLG_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ueq), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ueq), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -566,12 +614,14 @@ body: | ; GFX1150-NEXT: S_CMP_NGT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ule), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ule), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -590,12 +640,14 @@ body: | ; GFX1150-NEXT: S_CMP_NLE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(ugt), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(ugt), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -614,12 +666,14 @@ body: | ; GFX1150-NEXT: S_CMP_NEQ_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(une), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(une), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... --- @@ -638,11 +692,13 @@ body: | ; GFX1150-NEXT: S_CMP_NLT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s32) = COPY $sgpr1 - %3:sgpr(s16) = G_TRUNC %2(s32) - %4:sgpr(s32) = G_FCMP floatpred(uge), %1(s16), %3 - $sgpr0 = COPY %4(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = COPY $sgpr1 + %3:sgpr(i16) = G_TRUNC %2(i32) + %4:sgpr(f16) = G_BITCAST %1(i16) + %5:sgpr(f16) = G_BITCAST %3(i16) + %6:sgpr(i32) = G_FCMP floatpred(uge), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-packed.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-packed.xfail.mir index 132596d186a61..c3427b52123eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-packed.xfail.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-packed.xfail.mir @@ -2,16 +2,16 @@ # Make sure v2s16 SALU operations fail to select -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_ADD %0:sgpr, %1:sgpr (in function: s_add_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_SUB %0:sgpr, %1:sgpr (in function: s_sub_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_MUL %0:sgpr, %1:sgpr (in function: s_mul_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_SHL %0:sgpr, %1:sgpr(<2 x s16>) (in function: s_shl_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: s_lshr_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: s_ashr_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_SMIN %0:sgpr, %1:sgpr (in function: s_smin_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_SMAX %0:sgpr, %1:sgpr (in function: s_smax_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_UMIN %0:sgpr, %1:sgpr (in function: s_umin_v2s16) -# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_UMAX %0:sgpr, %1:sgpr (in function: s_umax_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_ADD %0:sgpr, %1:sgpr (in function: s_add_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_SUB %0:sgpr, %1:sgpr (in function: s_sub_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_MUL %0:sgpr, %1:sgpr (in function: s_mul_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_SHL %0:sgpr, %1:sgpr(<2 x i16>) (in function: s_shl_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x i16>) (in function: s_lshr_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x i16>) (in function: s_ashr_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_SMIN %0:sgpr, %1:sgpr (in function: s_smin_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_SMAX %0:sgpr, %1:sgpr (in function: s_smax_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_UMIN %0:sgpr, %1:sgpr (in function: s_umin_v2s16) +# ERR: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_UMAX %0:sgpr, %1:sgpr (in function: s_umax_v2s16) --- name: s_add_v2s16 @@ -23,9 +23,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_ADD %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_ADD %0, %1 S_ENDPGM 0, implicit %2 ... @@ -40,9 +40,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SUB %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_SUB %0, %1 S_ENDPGM 0, implicit %2 ... @@ -57,9 +57,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_MUL %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_MUL %0, %1 S_ENDPGM 0, implicit %2 ... @@ -74,9 +74,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHL %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_SHL %0, %1 S_ENDPGM 0, implicit %2 ... @@ -91,9 +91,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_LSHR %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_LSHR %0, %1 S_ENDPGM 0, implicit %2 ... @@ -108,9 +108,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_ASHR %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_ASHR %0, %1 S_ENDPGM 0, implicit %2 ... @@ -125,9 +125,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SMIN %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_SMIN %0, %1 S_ENDPGM 0, implicit %2 ... @@ -142,9 +142,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SMAX %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_SMAX %0, %1 S_ENDPGM 0, implicit %2 ... @@ -159,9 +159,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_UMIN %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_UMIN %0, %1 S_ENDPGM 0, implicit %2 ... @@ -176,9 +176,9 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_UMAX %0, %1 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_UMAX %0, %1 S_ENDPGM 0, implicit %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir index 02a313cc006d6..eb19b76dccd18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir @@ -22,13 +22,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(s32) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(i32) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(i32) ... @@ -53,13 +53,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s64) = COPY $sgpr2_sgpr3 - %3:sgpr(s64) = COPY $sgpr4_sgpr5 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(s64) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i64) = COPY $sgpr2_sgpr3 + %3:sgpr(i64) = COPY $sgpr4_sgpr5 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(i64) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(i64) ... @@ -84,13 +84,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 %2:sgpr(p0) = COPY $sgpr2_sgpr3 %3:sgpr(p0) = COPY $sgpr4_sgpr5 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(p0) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(p0) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(p0) ... @@ -115,13 +115,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 %2:sgpr(p1) = COPY $sgpr2_sgpr3 %3:sgpr(p1) = COPY $sgpr4_sgpr5 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(p1) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(p1) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(p1) ... @@ -146,13 +146,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 %2:sgpr(p999) = COPY $sgpr2_sgpr3 %3:sgpr(p999) = COPY $sgpr4_sgpr5 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(p999) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(p999) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(p999) ... @@ -177,13 +177,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %3:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(<4 x s16>) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %3:sgpr(<4 x i16>) = COPY $sgpr4_sgpr5 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(<4 x i16>) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(<4 x i16>) ... @@ -208,15 +208,15 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(s16) = G_TRUNC %0 - %5:sgpr(s16) = G_TRUNC %1 - %6:sgpr(s32) = G_ICMP intpred(eq), %2, %3 - %7:sgpr(s16) = G_SELECT %6, %4, %5 - S_ENDPGM 0, implicit %7 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(i16) = G_TRUNC %0(i32) + %5:sgpr(i16) = G_TRUNC %1(i32) + %6:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + %7:sgpr(i16) = G_SELECT %6(i32), %4, %5 + S_ENDPGM 0, implicit %7(i16) ... @@ -241,13 +241,13 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = COPY $sgpr2 - %3:sgpr(<2 x s16>) = COPY $sgpr3 - %4:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %5:sgpr(<2 x s16>) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = COPY $sgpr2 + %3:sgpr(<2 x i16>) = COPY $sgpr3 + %4:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %5:sgpr(<2 x i16>) = G_SELECT %4(i32), %2, %3 + S_ENDPGM 0, implicit %5(<2 x i16>) ... @@ -270,13 +270,13 @@ body: | ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %5:vgpr(s32) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %5:vgpr(i32) = G_SELECT %4(i1), %2, %3 + S_ENDPGM 0, implicit %5(i32) ... @@ -299,15 +299,15 @@ body: | ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s16) = G_TRUNC %0 - %5:vgpr(s16) = G_TRUNC %1 - %6:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %7:vgpr(s16) = G_SELECT %6, %4, %5 - S_ENDPGM 0, implicit %7 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i16) = G_TRUNC %0(i32) + %5:vgpr(i16) = G_TRUNC %1(i32) + %6:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %7:vgpr(i16) = G_SELECT %6(i1), %4, %5 + S_ENDPGM 0, implicit %7(i16) ... @@ -330,13 +330,13 @@ body: | ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = COPY $vgpr2 - %3:vgpr(<2 x s16>) = COPY $vgpr3 - %4:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %5:vgpr(<2 x s16>) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = COPY $vgpr2 + %3:vgpr(<2 x i16>) = COPY $vgpr3 + %4:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %5:vgpr(<2 x i16>) = G_SELECT %4(i1), %2, %3 + S_ENDPGM 0, implicit %5(<2 x i16>) ... @@ -359,13 +359,13 @@ body: | ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 %2:vgpr(p3) = COPY $vgpr2 %3:vgpr(p3) = COPY $vgpr3 - %4:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %5:vgpr(p3) = G_SELECT %4, %2, %3 - S_ENDPGM 0, implicit %5 + %4:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %5:vgpr(p3) = G_SELECT %4(i1), %2, %3 + S_ENDPGM 0, implicit %5(p3) ... @@ -386,17 +386,21 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = G_FNEG %2 - %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %6:vgpr(s32) = G_SELECT %5, %4, %3 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(f32) = G_BITCAST %2(i32) + %5:vgpr(f32) = G_FNEG %4 + %6:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %7:vgpr(i32) = G_BITCAST %5(f32) + %8:vgpr(i32) = G_SELECT %6(i1), %7, %3 + S_ENDPGM 0, implicit %8(i32) ... @@ -416,17 +420,21 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY3]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_XOR_B32_e64_]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = G_FNEG %3 - %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %6:vgpr(s32) = G_SELECT %5, %2, %4 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(f32) = G_BITCAST %3(i32) + %5:vgpr(f32) = G_FNEG %4 + %6:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %7:vgpr(i32) = G_BITCAST %5(f32) + %8:vgpr(i32) = G_SELECT %6(i1), %2, %7 + S_ENDPGM 0, implicit %8(i32) ... @@ -446,18 +454,22 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY3]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_OR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = G_FABS %3 - %5:vgpr(s32) = G_FNEG %4 - %6:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %7:vgpr(s32) = G_SELECT %6, %5, %2 - S_ENDPGM 0, implicit %7 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(f32) = G_BITCAST %3(i32) + %5:vgpr(f32) = G_FABS %4 + %6:vgpr(f32) = G_FNEG %5 + %7:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %8:vgpr(i32) = G_BITCAST %6(f32) + %9:vgpr(i32) = G_SELECT %7(i1), %8, %2 + S_ENDPGM 0, implicit %9(i32) ... @@ -483,16 +495,18 @@ body: | ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s16) = G_TRUNC %0 - %5:vgpr(s16) = G_TRUNC %1 - %6:vgpr(s16) = G_FNEG %4 - %7:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %8:vgpr(s16) = G_SELECT %7, %6, %5 - S_ENDPGM 0, implicit %8 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i16) = G_TRUNC %0(i32) + %5:vgpr(i16) = G_TRUNC %1(i32) + %6:vgpr(f16) = G_BITCAST %4(i16) + %7:vgpr(f16) = G_FNEG %6 + %8:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %9:vgpr(i16) = G_BITCAST %7(f16) + %10:vgpr(i16) = G_SELECT %8(i1), %9, %5 + S_ENDPGM 0, implicit %10(i16) ... @@ -518,14 +532,16 @@ body: | ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = COPY $vgpr2 - %3:vgpr(<2 x s16>) = COPY $vgpr3 - %4:vgpr(<2 x s16>) = G_FNEG %3 - %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 - %6:vgpr(<2 x s16>) = G_SELECT %5, %4, %3 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = COPY $vgpr2 + %3:vgpr(<2 x i16>) = COPY $vgpr3 + %4:vgpr(<2 x f16>) = G_BITCAST %3(<2 x i16>) + %5:vgpr(<2 x f16>) = G_FNEG %4 + %6:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + %7:vgpr(<2 x i16>) = G_BITCAST %5(<2 x f16>) + %8:vgpr(<2 x i16>) = G_SELECT %6(i1), %7, %3 + S_ENDPGM 0, implicit %8(<2 x i16>) ... @@ -554,14 +570,16 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(s32) = G_FNEG %2 - %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %6:sgpr(s32) = G_SELECT %5, %4, %3 - S_ENDPGM 0, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(f32) = G_BITCAST %2(i32) + %5:sgpr(f32) = G_FNEG %4 + %6:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %7:sgpr(i32) = G_BITCAST %5(f32) + %8:sgpr(i32) = G_SELECT %6(i32), %7, %3 + S_ENDPGM 0, implicit %8(i32) ... @@ -588,13 +606,15 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY4]] ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(s32) = G_FNEG %3 - %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1 - %6:sgpr(s32) = G_SELECT %5, %2, %4 - S_ENDPGM 0, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(f32) = G_BITCAST %3(i32) + %5:sgpr(f32) = G_FNEG %4 + %6:sgpr(i32) = G_ICMP intpred(eq), %0(i32), %1 + %7:sgpr(i32) = G_BITCAST %5(f32) + %8:sgpr(i32) = G_SELECT %6(i32), %2, %7 + S_ENDPGM 0, implicit %8(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir index 38218ec23c44a..dcb2a31fb8252 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir @@ -16,9 +16,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_SEXT_INREG %0, 1 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_SEXT_INREG %0, 1 + $sgpr0 = COPY %1(i32) ... --- @@ -36,9 +36,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 131072, implicit-def $scc ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_SEXT_INREG %0, 2 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_SEXT_INREG %0, 2 + $sgpr0 = COPY %1(i32) ... --- @@ -56,9 +56,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[COPY]] ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I8_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_SEXT_INREG %0, 8 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_SEXT_INREG %0, 8 + $sgpr0 = COPY %1(i32) ... --- @@ -76,9 +76,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_SEXT_INREG %0, 16 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_SEXT_INREG %0, 16 + $sgpr0 = COPY %1(i32) ... --- @@ -96,9 +96,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 2031616, implicit-def $scc ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_SEXT_INREG %0, 31 - $sgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = G_SEXT_INREG %0, 31 + $sgpr0 = COPY %1(i32) ... --- @@ -118,9 +118,9 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 1 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 1 + $sgpr0_sgpr1 = COPY %1(i64) ... --- @@ -140,9 +140,9 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 131072, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 2 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 2 + $sgpr0_sgpr1 = COPY %1(i64) ... --- @@ -162,9 +162,9 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 524288, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 8 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 8 + $sgpr0_sgpr1 = COPY %1(i64) ... --- @@ -184,9 +184,9 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 16 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 16 + $sgpr0_sgpr1 = COPY %1(i64) ... --- @@ -206,9 +206,9 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2031616, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 31 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 31 + $sgpr0_sgpr1 = COPY %1(i64) ... # Ideally this degenerate case would have been replaceed with a 32-bit shift by combines. @@ -228,9 +228,9 @@ body: | ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]].sub0, 31, implicit-def dead $scc ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 32 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 32 + $sgpr0_sgpr1 = COPY %1(i64) ... --- @@ -250,9 +250,9 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 4128768, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_SEXT_INREG %0, 63 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = G_SEXT_INREG %0, 63 + $sgpr0_sgpr1 = COPY %1(i64) ... --- @@ -270,9 +270,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXT_INREG %0, 1 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_SEXT_INREG %0, 1 + $vgpr0 = COPY %1(i32) ... --- @@ -290,9 +290,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 2, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXT_INREG %0, 2 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_SEXT_INREG %0, 2 + $vgpr0 = COPY %1(i32) ... --- @@ -310,9 +310,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 8, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXT_INREG %0, 8 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_SEXT_INREG %0, 8 + $vgpr0 = COPY %1(i32) ... --- @@ -330,9 +330,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXT_INREG %0, 16 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_SEXT_INREG %0, 16 + $vgpr0 = COPY %1(i32) ... --- @@ -350,7 +350,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 31, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXT_INREG %0, 31 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_SEXT_INREG %0, 31 + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir index 1de18cf17eb99..c1b8f2671d0ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -18,11 +18,11 @@ body: | ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], [[S_BFE_I32_]], implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s16) = G_SEXT %1 - %3:sgpr(s32) = G_ZEXT %2 - $sgpr0 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i16) = G_SEXT %1(i1) + %3:sgpr(i32) = G_ZEXT %2(i16) + $sgpr0 = COPY %3(i32) ... --- @@ -40,10 +40,10 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_SEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_SEXT %1(i1) + $sgpr0 = COPY %2(i32) ... --- @@ -63,10 +63,10 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s64) = G_SEXT %1 - $sgpr0_sgpr1 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i64) = G_SEXT %1(i1) + $sgpr0_sgpr1 = COPY %2(i64) ... --- @@ -84,10 +84,10 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s32) = G_SEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_SEXT %1(i16) + $sgpr0 = COPY %2(i32) ... @@ -111,10 +111,10 @@ body: | ; GCN-NEXT: [[S_SEXT_I32_I16_1:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = S_SEXT_I32_I16 [[COPY]] ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_SEXT_I32_I16_1]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s64) = G_SEXT %1 - $sgpr0_sgpr1 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i64) = G_SEXT %1(i16) + $sgpr0_sgpr1 = COPY %2(i64) ... @@ -135,9 +135,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_ASHR_I32_]] ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s64) = G_SEXT %0 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i64) = G_SEXT %0(i32) + $sgpr0_sgpr1 = COPY %1(i64) ... @@ -150,9 +150,9 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:vgpr(s32) = COPY $vgpr0 -# %1:vcc(s1) = G_ICMP intpred(eq), %0, %0 -# %2:vgpr(s32) = G_SEXT %1 +# %0:vgpr(i32) = COPY $vgpr0 +# %1:vcc(i1) = G_ICMP intpred(eq), %0, %0 +# %2:vgpr(i32) = G_SEXT %1 # $vgpr0 = COPY %2 # ... @@ -173,11 +173,11 @@ body: | ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_BFE_I32_e64_]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vgpr(s16) = G_SEXT %1 - %3:vgpr(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i16) = G_SEXT %1(i1) + %3:vgpr(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -195,10 +195,10 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vgpr(s32) = G_SEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i32) = G_SEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -216,10 +216,10 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_SEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i32) = G_SEXT %1(i16) + $vgpr0 = COPY %2(i32) ... @@ -238,8 +238,8 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sreg_32(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_SEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sreg_32(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_SEXT %1(i1) + $sgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir index 37958480d28a5..000dcf5fd6ee8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir @@ -20,7 +20,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] ; ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1 @@ -28,18 +28,18 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] ; ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_SEXTLOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -58,7 +58,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i16), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I16_]] ; ; GFX7-LABEL: name: sextload_local_s32_from_s16_align2 @@ -66,18 +66,18 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i16), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I16_]] ; ; GFX9-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_SEXTLOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -92,8 +92,8 @@ body: | # liveins: $vgpr0 # %0:vgpr(p3) = COPY $vgpr0 -# %1:vgpr(s16) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3) -# %2:vgpr(s32) = G_ANYEXT %1 +# %1:vgpr(i16) = G_SEXTLOAD %0 :: (load (i8), align 1, addrspace 3) +# %2:vgpr(i32) = G_ANYEXT %1 # $vgpr0 = COPY %2 # ... @@ -115,7 +115,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] ; ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 @@ -123,19 +123,19 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] ; ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_SEXTLOAD %2 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_SEXTLOAD %2(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir index fcbf0f097dce5..e3d45d175b115 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -53,10 +53,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -106,10 +106,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -159,10 +159,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -212,10 +212,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -265,10 +265,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX10-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s64) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = COPY $sgpr2 + %2:sgpr(i64) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -318,10 +318,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s64) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i64) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -371,10 +371,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s64) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i64) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -424,9 +424,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i64) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index b0703a642e033..1bcabe701e973 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -11,11 +11,11 @@ # RUN: FileCheck --check-prefix=ERR %s < %t # ERR-NOT: remark -# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_SHL %2:sgpr, %3:sgpr(s16) (in function: shl_s16_s16_ss) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_s32_vv) -# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_s32_ss) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_s32_sv) -# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_s32_vs) +# ERR: remark: :0:0: cannot select: %4:sgpr(i16) = G_SHL %2:sgpr, %3:sgpr(i16) (in function: shl_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_SHL %2:vgpr, %1:vgpr(i32) (in function: shl_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(i16) = G_SHL %2:sgpr, %1:sgpr(i32) (in function: shl_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_SHL %2:sgpr, %1:vgpr(i32) (in function: shl_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(i16) = G_SHL %2:vgpr, %1:sgpr(i32) (in function: shl_s16_s32_vs) # ERR-NOT: remark --- @@ -30,45 +30,48 @@ body: | ; GFX8-LABEL: name: shl_s16_s16_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX9-LABEL: name: shl_s16_s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX10-LABEL: name: shl_s16_s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX11-LABEL: name: shl_s16_s16_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX11-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX11-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -86,6 +89,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -93,6 +97,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -100,6 +105,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -107,12 +113,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_LSHLREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -127,40 +133,43 @@ body: | ; GFX8-LABEL: name: shl_s16_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX9-LABEL: name: shl_s16_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX10-LABEL: name: shl_s16_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX11-LABEL: name: shl_s16_s32_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_SHL %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_SHL %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -179,6 +188,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -186,6 +196,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -193,6 +204,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -200,12 +212,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[V_LSHLREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -224,6 +236,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -231,6 +244,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -240,6 +254,7 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHLREV_B16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -249,13 +264,13 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHLREV_B16_fake16_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_SHL %2, %3 - %5:vgpr(s32) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_SHL %2, %3(i16) + %5:vgpr(i32) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i32) ... --- @@ -276,6 +291,7 @@ body: | ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -285,6 +301,7 @@ body: | ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -297,6 +314,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX11-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -309,13 +327,13 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_SHL %2, %3 - %5:vgpr(s64) = G_ZEXT %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_SHL %2, %3(i16) + %5:vgpr(i64) = G_ZEXT %4(i16) + S_ENDPGM 0, implicit %5(i64) ... --- @@ -330,40 +348,43 @@ body: | ; GFX8-LABEL: name: shl_s16_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX9-LABEL: name: shl_s16_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX10-LABEL: name: shl_s16_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX11-LABEL: name: shl_s16_s32_ss ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_SHL %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[SHL:%[0-9]+]]:sgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_SHL %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -377,40 +398,43 @@ body: | ; GFX8-LABEL: name: shl_s16_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX9-LABEL: name: shl_s16_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX10-LABEL: name: shl_s16_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX11-LABEL: name: shl_s16_s32_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_SHL %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_SHL %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... --- @@ -428,6 +452,7 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX9-LABEL: name: shl_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -435,6 +460,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX10-LABEL: name: shl_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -442,6 +468,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; ; GFX11-LABEL: name: shl_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -449,12 +476,12 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_LSHLREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -468,38 +495,41 @@ body: | ; GFX8-LABEL: name: shl_s16_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX9-LABEL: name: shl_s16_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX10-LABEL: name: shl_s16_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + ; ; GFX11-LABEL: name: shl_s16_s32_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_SHL %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY1]](i32) + ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_SHL %2, %1(i32) + S_ENDPGM 0, implicit %3(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir index 100b5840b34df..33ed2e1c10aea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir @@ -7,7 +7,7 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_SHL %0:sgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_ss) +# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x i16>) = G_SHL %0:sgpr, %1:sgpr(<2 x i16>) (in function: shl_v2s16_ss) # ERR-NOT: remark --- @@ -21,21 +21,22 @@ body: | ; GFX9-LABEL: name: shl_v2s16_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:sgpr(<2 x i16>) = G_SHL [[COPY]], [[COPY1]](<2 x i16>) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x i16>) + ; ; GFX10-LABEL: name: shl_v2s16_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(<2 x i16>) = G_SHL [[COPY]], [[COPY1]](<2 x i16>) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x i16>) + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -53,6 +54,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] + ; ; GFX10-LABEL: name: shl_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -60,10 +62,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr0 - %2:vgpr(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr0 + %2:vgpr(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -81,6 +83,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] + ; ; GFX10-LABEL: name: shl_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -88,10 +91,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr0 - %2:vgpr(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr0 + %2:vgpr(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -109,6 +112,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] + ; ; GFX10-LABEL: name: shl_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -116,8 +120,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir index 3888ce87b46fd..18eda423acd50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -22,8 +22,8 @@ body: | ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (f32), addrspace 1) ; ; WAVE32-LABEL: name: sitofp ; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 @@ -33,8 +33,8 @@ body: | ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (f32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (f32), addrspace 1) ; ; GFX11-LABEL: name: sitofp ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 @@ -44,20 +44,20 @@ body: | ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX11-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - %0:sgpr(s32) = COPY $sgpr0 + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (f32), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (f32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(p1) = COPY $vgpr3_vgpr4 + %3:vgpr(f32) = G_SITOFP %0(i32) + %4:vgpr(f32) = G_SITOFP %1(i32) + G_STORE %3(f32), %2(p1) :: (store (f32), addrspace 1) + G_STORE %4(f32), %2(p1) :: (store (f32), addrspace 1) - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(p1) = COPY $vgpr3_vgpr4 - %3:vgpr(s32) = G_SITOFP %0 - %4:vgpr(s32) = G_SITOFP %1 - G_STORE %3, %2 :: (store (s32), addrspace 1) - G_STORE %4, %2 :: (store (s32), addrspace 1) ... --- @@ -103,10 +103,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_SITOFP %0 - %2:vgpr(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f16) = G_SITOFP %0(i32) + %2:vgpr(i16) = G_BITCAST %1(f16) + %3:vgpr(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -152,8 +153,9 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s16) = G_SITOFP %0 - %2:vgpr(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(f16) = G_SITOFP %0(i32) + %2:vgpr(i16) = G_BITCAST %1(f16) + %3:vgpr(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir index fb026d3efa0a6..1d2c48b71f956 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MAX_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_SMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_SMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_SMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_SMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -60,10 +60,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_SMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_SMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -81,8 +81,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_SMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_SMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir index 33f14c179f2a9..c8d53cf1fd26d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MIN_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_SMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_SMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_SMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_SMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -60,10 +60,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_SMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_SMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -81,8 +81,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_SMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_SMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir index 813006f378b81..9b2a34c543e3a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir @@ -6,7 +6,7 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # ERR-NOT: remark: -# ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss) +# ERR: remark: :0:0: cannot select: %2:sgpr(i32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss) # ERR-NOT: remark: --- @@ -21,10 +21,11 @@ body: | ; SI-LABEL: name: smulh_s32_ss ; SI: liveins: $sgpr0, $sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; SI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; SI-NEXT: S_ENDPGM 0, implicit [[SMULH]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; SI-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; SI-NEXT: S_ENDPGM 0, implicit [[SMULH]](i32) + ; ; GFX9-LABEL: name: smulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -32,10 +33,10 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9-NEXT: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[COPY]], [[COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_MUL_HI_I32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_SMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_SMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -54,6 +55,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] + ; ; GFX9-LABEL: name: smulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -61,10 +63,10 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_SMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_SMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -83,6 +85,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] + ; ; GFX9-LABEL: name: smulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -90,10 +93,10 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_SMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_SMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -112,6 +115,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] + ; ; GFX9-LABEL: name: smulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -119,8 +123,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_SMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_SMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.mir index 93eaf40524125..dd3b69a9c5121 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.mir @@ -14,7 +14,7 @@ body: | ; GFX10-WAVE64-LABEL: name: stackrestore_waveaddress_sgpr ; GFX10-WAVE64: $sgpr32 = COPY $sgpr32 %0:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - G_STACKRESTORE %0 + G_STACKRESTORE %0(p5) ... @@ -40,7 +40,7 @@ body: | ; GFX10-WAVE64-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 6, implicit-def dead $scc ; GFX10-WAVE64-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]] %0:sgpr(p5) = COPY $sgpr32 - G_STACKRESTORE %0 + G_STACKRESTORE %0(p5) ... @@ -65,7 +65,6 @@ body: | ; GFX10-WAVE64-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 6, implicit-def dead $scc ; GFX10-WAVE64-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]] %0:sgpr(p5) = COPY $sgpr10 - G_STACKRESTORE %0 + G_STACKRESTORE %0(p5) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir index 2675295ea98ed..9da9f96eda880 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -21,16 +21,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (i32)) + ; ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (i32)) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 0) + G_STORE %0(i32), %1(p0) :: (store seq_cst (i32)) ... @@ -48,18 +49,19 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_v2s16_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>)) + ; GFX7-NEXT: G_STORE [[COPY]](<2 x i16>), [[COPY1]](p0) :: (store seq_cst (<2 x i16>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>)) - %0:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: G_STORE [[COPY]](<2 x i16>), [[COPY1]](p0) :: (store seq_cst (<2 x i16>)) + %0:vgpr(<2 x i16>) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst (<2 x s16>), align 4, addrspace 0) + G_STORE %0(<2 x i16>), %1(p0) :: (store seq_cst (<2 x i16>)) ... @@ -80,6 +82,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3)) + ; ; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -88,7 +91,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3)) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst (p3), align 4, addrspace 0) + G_STORE %0(p3), %1(p0) :: (store seq_cst (p3)) ... @@ -109,6 +112,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5)) + ; ; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -117,7 +121,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5)) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst (p5), align 4, addrspace 0) + G_STORE %0(p5), %1(p0) :: (store seq_cst (p5)) ... @@ -138,6 +142,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6)) + ; ; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -146,7 +151,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6)) %0:vgpr(p6) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst (p6), align 4, addrspace 0) + G_STORE %0(p6), %1(p0) :: (store seq_cst (p6)) ... @@ -166,16 +171,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (i64)) + ; ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (i64)) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 0) + G_STORE %0(i64), %1(p0) :: (store seq_cst (i64)) ... @@ -193,18 +199,19 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) + ; GFX7-NEXT: G_STORE [[COPY]](<2 x i32>), [[COPY1]](p0) :: (store seq_cst (<2 x i32>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: G_STORE [[COPY]](<2 x i32>), [[COPY1]](p0) :: (store seq_cst (<2 x i32>)) + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 0) + G_STORE %0(<2 x i32>), %1(p0) :: (store seq_cst (<2 x i32>)) ... @@ -222,18 +229,19 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>)) + ; GFX7-NEXT: G_STORE [[COPY]](<4 x i16>), [[COPY1]](p0) :: (store seq_cst (<4 x i16>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>)) - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: G_STORE [[COPY]](<4 x i16>), [[COPY1]](p0) :: (store seq_cst (<4 x i16>)) + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst (<4 x s16>), align 8, addrspace 0) + G_STORE %0(<4 x i16>), %1(p0) :: (store seq_cst (<4 x i16>)) ... @@ -254,6 +262,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0)) + ; ; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -262,7 +271,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst (p0), align 8, addrspace 0) + G_STORE %0(p0), %1(p0) :: (store seq_cst (p0)) ... --- @@ -282,6 +291,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1)) + ; ; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -290,6 +300,6 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst (p1), align 8, addrspace 0) + G_STORE %0(p1), %1(p0) :: (store seq_cst (p1)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir index d290f1b2403e4..40ae34f1893b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir @@ -23,23 +23,25 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (i32), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_s32_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (i32), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s32), addrspace 3) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (i32), addrspace 3) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 3) + G_STORE %0(i32), %1(p3) :: (store seq_cst (i32), addrspace 3) ... @@ -57,26 +59,28 @@ body: | ; GFX6-LABEL: name: atomic_store_local_v2s16_seq_cst ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst (<2 x s16>), addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](<2 x i16>), [[COPY1]](p3) :: (store seq_cst (<2 x i16>), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_v2s16_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst (<2 x s16>), addrspace 3) + ; GFX7-NEXT: G_STORE [[COPY]](<2 x i16>), [[COPY1]](p3) :: (store seq_cst (<2 x i16>), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_v2s16_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 - ; GFX9-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst (<2 x s16>), addrspace 3) - %0:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: G_STORE [[COPY]](<2 x i16>), [[COPY1]](p3) :: (store seq_cst (<2 x i16>), addrspace 3) + %0:vgpr(<2 x i16>) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst (<2 x s16>), align 4, addrspace 3) + G_STORE %0(<2 x i16>), %1(p3) :: (store seq_cst (<2 x i16>), addrspace 3) ... @@ -98,6 +102,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst (p3), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_p3_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} @@ -105,6 +110,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst (p3), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_p3_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -113,7 +119,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst (p3), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst (p3), align 4, addrspace 3) + G_STORE %0(p3), %1(p3) :: (store seq_cst (p3), addrspace 3) ... @@ -135,6 +141,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst (p5), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_p5_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} @@ -142,6 +149,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst (p5), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_p5_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -150,7 +158,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst (p5), addrspace 3) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst (p5), align 4, addrspace 3) + G_STORE %0(p5), %1(p3) :: (store seq_cst (p5), addrspace 3) ... @@ -172,6 +180,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst (p6), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_p6_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} @@ -179,6 +188,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst (p6), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_p6_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -187,7 +197,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst (p6), addrspace 3) %0:vgpr(p6) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst (p6), align 4, addrspace 3) + G_STORE %0(p6), %1(p3) :: (store seq_cst (p6), addrspace 3) ... @@ -208,23 +218,25 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (i64), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (i64), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s64), addrspace 3) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (i64), addrspace 3) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 3) + G_STORE %0(i64), %1(p3) :: (store seq_cst (i64), addrspace 3) ... @@ -242,26 +254,28 @@ body: | ; GFX6-LABEL: name: atomic_store_local_v2s32_seq_cst ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst (<2 x s32>), addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](<2 x i32>), [[COPY1]](p3) :: (store seq_cst (<2 x i32>), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst (<2 x s32>), addrspace 3) + ; GFX7-NEXT: G_STORE [[COPY]](<2 x i32>), [[COPY1]](p3) :: (store seq_cst (<2 x i32>), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst (<2 x s32>), addrspace 3) - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: G_STORE [[COPY]](<2 x i32>), [[COPY1]](p3) :: (store seq_cst (<2 x i32>), addrspace 3) + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 3) + G_STORE %0(<2 x i32>), %1(p3) :: (store seq_cst (<2 x i32>), addrspace 3) ... @@ -279,26 +293,28 @@ body: | ; GFX6-LABEL: name: atomic_store_local_v4s16_seq_cst ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst (<4 x s16>), addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](<4 x i16>), [[COPY1]](p3) :: (store seq_cst (<4 x i16>), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst (<4 x s16>), addrspace 3) + ; GFX7-NEXT: G_STORE [[COPY]](<4 x i16>), [[COPY1]](p3) :: (store seq_cst (<4 x i16>), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst (<4 x s16>), addrspace 3) - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: G_STORE [[COPY]](<4 x i16>), [[COPY1]](p3) :: (store seq_cst (<4 x i16>), addrspace 3) + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst (<4 x s16>), align 8, addrspace 3) + G_STORE %0(<4 x i16>), %1(p3) :: (store seq_cst (<4 x i16>), addrspace 3) ... @@ -320,6 +336,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst (p0), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -327,6 +344,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst (p0), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -335,7 +353,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst (p0), addrspace 3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst (p0), align 8, addrspace 3) + G_STORE %0(p0), %1(p3) :: (store seq_cst (p0), addrspace 3) ... --- @@ -356,6 +374,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst (p1), addrspace 3) + ; ; GFX7-LABEL: name: atomic_store_local_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -363,6 +382,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst (p1), addrspace 3) + ; ; GFX9-LABEL: name: atomic_store_local_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -371,6 +391,6 @@ body: | ; GFX9-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst (p1), addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst (p1), align 8, addrspace 3) + G_STORE %0(p1), %1(p3) :: (store seq_cst (p1), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index 6e92d851dee2e..c607a70b17dc6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -22,45 +22,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX8-LABEL: name: store_flat_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX9-LABEL: name: store_flat_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX10-LABEL: name: store_flat_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX11-LABEL: name: store_flat_s32_to_4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX12-LABEL: name: store_flat_s32_to_4 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i32)) ... @@ -79,45 +79,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX7-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16)) ; ; GFX8-LABEL: name: store_flat_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16)) ; ; GFX9-LABEL: name: store_flat_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX9-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16)) ; ; GFX10-LABEL: name: store_flat_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16)) ; ; GFX11-LABEL: name: store_flat_s32_to_2 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX11-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16)) ; ; GFX12-LABEL: name: store_flat_s32_to_2 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX12-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s16), align 2, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i16)) ... @@ -136,45 +136,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX7-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8)) ; ; GFX8-LABEL: name: store_flat_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8)) ; ; GFX9-LABEL: name: store_flat_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX9-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8)) ; ; GFX10-LABEL: name: store_flat_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8)) ; ; GFX11-LABEL: name: store_flat_s32_to_1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX11-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8)) ; ; GFX12-LABEL: name: store_flat_s32_to_1 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX12-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i8)) ... @@ -194,45 +194,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64)) ; ; GFX8-LABEL: name: store_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64)) ; ; GFX9-LABEL: name: store_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64)) ; ; GFX10-LABEL: name: store_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64)) ; ; GFX11-LABEL: name: store_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64)) ; ; GFX12-LABEL: name: store_flat_s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 8, addrspace 0) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64)) ... --- @@ -250,46 +250,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16) ; ; GFX8-LABEL: name: store_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16) ; ; GFX9-LABEL: name: store_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16) ; ; GFX10-LABEL: name: store_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16) ; ; GFX11-LABEL: name: store_flat_s96 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX11-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16) ; ; GFX12-LABEL: name: store_flat_s96 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX12-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX12-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 16, addrspace 0) + %1:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 16) ... --- @@ -307,46 +307,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128)) ; ; GFX8-LABEL: name: store_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128)) ; ; GFX9-LABEL: name: store_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128)) ; ; GFX10-LABEL: name: store_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128)) ; ; GFX11-LABEL: name: store_flat_s128 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128)) ; ; GFX12-LABEL: name: store_flat_s128 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX12-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 16, addrspace 0) + %1:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128)) ... @@ -366,45 +366,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>)) ; ; GFX8-LABEL: name: store_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>)) ; ; GFX9-LABEL: name: store_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>)) ; ; GFX10-LABEL: name: store_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>)) ; ; GFX11-LABEL: name: store_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>)) ; ; GFX12-LABEL: name: store_flat_v2s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 0) + %1:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>)) ... --- @@ -423,45 +423,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16) ; ; GFX8-LABEL: name: store_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16) ; ; GFX9-LABEL: name: store_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16) ; ; GFX10-LABEL: name: store_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16) ; ; GFX11-LABEL: name: store_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16) ; ; GFX12-LABEL: name: store_flat_v3s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX12-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX12-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 0) + %1:vgpr(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 16) ... --- @@ -480,45 +480,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>)) ; ; GFX8-LABEL: name: store_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>)) ; ; GFX9-LABEL: name: store_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>)) ; ; GFX10-LABEL: name: store_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>)) ; ; GFX11-LABEL: name: store_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>)) ; ; GFX12-LABEL: name: store_flat_v4s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX12-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 0) + %1:vgpr(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>)) ... @@ -538,45 +538,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>)) ; ; GFX8-LABEL: name: store_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>)) ; ; GFX9-LABEL: name: store_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>)) ; ; GFX10-LABEL: name: store_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>)) ; ; GFX11-LABEL: name: store_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>)) ; ; GFX12-LABEL: name: store_flat_v2s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = COPY $vgpr2 - G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 0) + %1:vgpr(<2 x i16>) = COPY $vgpr2 + G_STORE %1(<2 x i16>), %0(p1) :: (store (<2 x i16>)) ... @@ -596,45 +596,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>)) ; ; GFX8-LABEL: name: store_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>)) ; ; GFX9-LABEL: name: store_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>)) ; ; GFX10-LABEL: name: store_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>)) ; ; GFX11-LABEL: name: store_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>)) ; ; GFX12-LABEL: name: store_flat_v4s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 0) + %1:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>)) ... @@ -653,46 +653,46 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16) ; ; GFX8-LABEL: name: store_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16) ; ; GFX9-LABEL: name: store_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16) ; ; GFX10-LABEL: name: store_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16) ; ; GFX11-LABEL: name: store_flat_v6s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX11-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16) ; ; GFX12-LABEL: name: store_flat_v6s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX12-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX12-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<6 x s16>), align 16, addrspace 0) + %1:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<6 x i16>), %0(p1) :: (store (<6 x i16>), align 16) ... --- @@ -711,45 +711,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>)) ; ; GFX8-LABEL: name: store_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>)) ; ; GFX9-LABEL: name: store_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>)) ; ; GFX10-LABEL: name: store_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>)) ; ; GFX11-LABEL: name: store_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>)) ; ; GFX12-LABEL: name: store_flat_v8s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX12-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0) + %1:vgpr(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>)) ... @@ -769,45 +769,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>)) ; ; GFX8-LABEL: name: store_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>)) ; ; GFX9-LABEL: name: store_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>)) ; ; GFX10-LABEL: name: store_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>)) ; ; GFX11-LABEL: name: store_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>)) ; ; GFX12-LABEL: name: store_flat_v2s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX12-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 0) + %1:vgpr(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>)) ... @@ -865,7 +865,7 @@ body: | ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p1), align 8, addrspace 0) + G_STORE %1(p1), %0(p1) :: (store (p1)) ... @@ -923,7 +923,7 @@ body: | ; GFX12-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p1>), align 16, addrspace 0) + G_STORE %1(<2 x p1>), %0(p1) :: (store (<2 x p1>)) ... @@ -981,7 +981,7 @@ body: | ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store (p3), align 4, addrspace 0) + G_STORE %1(p3), %0(p1) :: (store (p3)) ... @@ -1039,7 +1039,7 @@ body: | ; GFX12-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 0) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>)) ... --- @@ -1058,45 +1058,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32)) ; ; GFX8-LABEL: name: store_atomic_flat_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32)) ; ; GFX9-LABEL: name: store_atomic_flat_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32)) ; ; GFX10-LABEL: name: store_atomic_flat_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32)) ; ; GFX11-LABEL: name: store_atomic_flat_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32)) ; ; GFX12-LABEL: name: store_atomic_flat_s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store monotonic (i32)) ... @@ -1116,45 +1116,45 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64)) ; ; GFX8-LABEL: name: store_atomic_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64)) ; ; GFX9-LABEL: name: store_atomic_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64)) ; ; GFX10-LABEL: name: store_atomic_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64)) ; ; GFX11-LABEL: name: store_atomic_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64)) ; ; GFX12-LABEL: name: store_atomic_flat_s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX12-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 0) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store monotonic (i64)) ... @@ -1182,7 +1182,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX8-LABEL: name: store_flat_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1197,14 +1197,14 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX9-LABEL: name: store_flat_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX10-LABEL: name: store_flat_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1219,26 +1219,26 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX11-LABEL: name: store_flat_s32_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX12-LABEL: name: store_flat_s32_gep_2047 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2047 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32)) ... @@ -1266,7 +1266,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX8-LABEL: name: store_flat_s32_to_1_gep_24bit_max ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1281,7 +1281,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX9-LABEL: name: store_flat_s32_to_1_gep_24bit_max ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1296,7 +1296,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX10-LABEL: name: store_flat_s32_to_1_gep_24bit_max ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1311,7 +1311,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX11-LABEL: name: store_flat_s32_to_1_gep_24bit_max ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1326,19 +1326,19 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX12-LABEL: name: store_flat_s32_to_1_gep_24bit_max ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 8388607, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 8388607, 0, implicit $exec, implicit $flat_scr :: (store (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 8388607 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 8388607 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32)) ... @@ -1366,7 +1366,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX8-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_max ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1381,7 +1381,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX9-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_max ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1396,7 +1396,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX10-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_max ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1411,7 +1411,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX11-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_max ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1426,7 +1426,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX12-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_max ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1441,12 +1441,12 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 16777214 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 16777214 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32)) ... @@ -1474,7 +1474,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX8-LABEL: name: store_flat_s32_to_1_gep_24bit_min ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1489,7 +1489,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX9-LABEL: name: store_flat_s32_to_1_gep_24bit_min ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1504,7 +1504,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX10-LABEL: name: store_flat_s32_to_1_gep_24bit_min ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1519,7 +1519,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX11-LABEL: name: store_flat_s32_to_1_gep_24bit_min ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1534,19 +1534,19 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX12-LABEL: name: store_flat_s32_to_1_gep_24bit_min ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], -8388608, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], -8388608, 0, implicit $exec, implicit $flat_scr :: (store (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 -8388608 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 -8388608 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32)) ... @@ -1574,7 +1574,7 @@ body: | ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX8-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_min ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1589,7 +1589,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX9-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_min ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1604,7 +1604,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX10-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_min ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1619,7 +1619,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX11-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_min ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1634,7 +1634,7 @@ body: | ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) ; ; GFX12-LABEL: name: store_flat_s32_to_1_gep_2x_24bit_min ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1649,10 +1649,10 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX12-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 -16777215 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 -16777215 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir index 9136f44dfc227..230e55ee9726a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -29,7 +29,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -41,45 +41,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_4 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i32), addrspace 1) ... @@ -103,7 +103,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i16), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_2 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -115,45 +115,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i16), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i16), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i16), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_2 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i16), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s16), align 2, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i16), addrspace 1) ... @@ -177,7 +177,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -189,45 +189,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i8), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i8), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_1 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i8), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i8), addrspace 1) ... @@ -252,7 +252,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i64), addrspace 1) ; ; GFX7-LABEL: name: store_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -264,45 +264,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (i64), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64), addrspace 1) ; ; GFX8-LABEL: name: store_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i64), addrspace 1) ; ; GFX9-LABEL: name: store_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i64), addrspace 1) ; ; GFX10-LABEL: name: store_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i64), addrspace 1) ; ; GFX12-LABEL: name: store_global_s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (i64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 8, addrspace 1) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), addrspace 1) ... --- @@ -320,53 +320,53 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) ; ; GFX7-LABEL: name: store_global_s128 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) ; ; GFX8-LABEL: name: store_global_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) ; ; GFX9-LABEL: name: store_global_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) ; ; GFX10-LABEL: name: store_global_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) ; ; GFX12-LABEL: name: store_global_s128 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX12-NEXT: G_STORE [[COPY1]](i128), [[COPY]](p1) :: (store (i128), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 16, addrspace 1) + %1:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), addrspace 1) ... @@ -391,7 +391,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 1) ; ; GFX7-LABEL: name: store_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -403,45 +403,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>), addrspace 1) ; ; GFX8-LABEL: name: store_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i32>), addrspace 1) ; ; GFX9-LABEL: name: store_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 1) ; ; GFX10-LABEL: name: store_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 1) ; ; GFX12-LABEL: name: store_global_v2s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1) + %1:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), addrspace 1) ... --- @@ -465,7 +465,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x i32>), addrspace 1) ; ; GFX7-LABEL: name: store_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -477,45 +477,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x i32>), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>), addrspace 1) ; ; GFX8-LABEL: name: store_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i32>), addrspace 1) ; ; GFX9-LABEL: name: store_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x i32>), addrspace 1) ; ; GFX10-LABEL: name: store_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x i32>), addrspace 1) ; ; GFX12-LABEL: name: store_global_v4s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x i32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1) + %1:vgpr(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), addrspace 1) ... @@ -540,7 +540,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 1) ; ; GFX7-LABEL: name: store_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -552,45 +552,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>), addrspace 1) ; ; GFX8-LABEL: name: store_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>), addrspace 1) ; ; GFX9-LABEL: name: store_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 1) ; ; GFX10-LABEL: name: store_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 1) ; ; GFX12-LABEL: name: store_global_v2s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = COPY $vgpr2 - G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 1) + %1:vgpr(<2 x i16>) = COPY $vgpr2 + G_STORE %1(<2 x i16>), %0(p1) :: (store (<2 x i16>), addrspace 1) ... @@ -615,7 +615,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 1) ; ; GFX7-LABEL: name: store_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -627,45 +627,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>), addrspace 1) ; ; GFX8-LABEL: name: store_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x i16>), addrspace 1) ; ; GFX9-LABEL: name: store_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 1) ; ; GFX10-LABEL: name: store_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 1) ; ; GFX12-LABEL: name: store_global_v4s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1) + %1:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>), addrspace 1) ... @@ -690,7 +690,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<8 x i16>), addrspace 1) ; ; GFX7-LABEL: name: store_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -702,45 +702,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<8 x i16>), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>), addrspace 1) ; ; GFX8-LABEL: name: store_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x i16>), addrspace 1) ; ; GFX9-LABEL: name: store_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x i16>), addrspace 1) ; ; GFX10-LABEL: name: store_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x i16>), addrspace 1) ; ; GFX12-LABEL: name: store_global_v8s16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x i16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1) + %1:vgpr(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>), addrspace 1) ... @@ -765,7 +765,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i64>), addrspace 1) ; ; GFX7-LABEL: name: store_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -777,45 +777,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x i64>), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>), addrspace 1) ; ; GFX8-LABEL: name: store_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i64>), addrspace 1) ; ; GFX9-LABEL: name: store_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i64>), addrspace 1) ; ; GFX10-LABEL: name: store_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i64>), addrspace 1) ; ; GFX12-LABEL: name: store_global_v2s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x i64>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1) + %1:vgpr(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), addrspace 1) ... @@ -890,7 +890,7 @@ body: | ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p1), align 8, addrspace 1) + G_STORE %1(p1), %0(p1) :: (store (p1), addrspace 1) ... @@ -955,7 +955,7 @@ body: | ; GFX12-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p1>), align 16, addrspace 1) + G_STORE %1(<2 x p1>), %0(p1) :: (store (<2 x p1>), addrspace 1) ... @@ -1030,7 +1030,7 @@ body: | ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store (p3), align 4, addrspace 1) + G_STORE %1(p3), %0(p1) :: (store (p3), addrspace 1) ... @@ -1095,7 +1095,7 @@ body: | ; GFX12-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>), addrspace 1) ... --- @@ -1119,7 +1119,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (i32), addrspace 1) ; ; GFX7-LABEL: name: store_atomic_global_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1131,45 +1131,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_atomic_global_s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32), addrspace 1) ; ; GFX8-LABEL: name: store_atomic_global_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i32), addrspace 1) ; ; GFX9-LABEL: name: store_atomic_global_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (i32), addrspace 1) ; ; GFX10-LABEL: name: store_atomic_global_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (i32), addrspace 1) ; ; GFX12-LABEL: name: store_atomic_global_s32 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store monotonic (i32), addrspace 1) ... @@ -1194,7 +1194,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (i64), addrspace 1) ; ; GFX7-LABEL: name: store_atomic_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1206,45 +1206,45 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (i64), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_atomic_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64), addrspace 1) ; ; GFX8-LABEL: name: store_atomic_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (i64), addrspace 1) ; ; GFX9-LABEL: name: store_atomic_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (i64), addrspace 1) ; ; GFX10-LABEL: name: store_atomic_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (i64), addrspace 1) ; ; GFX12-LABEL: name: store_atomic_global_s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (i64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 1) + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store monotonic (i64), addrspace 1) ... @@ -1269,7 +1269,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1281,7 +1281,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1296,7 +1296,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1311,33 +1311,33 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_gep_2047 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 2047 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32), addrspace 1) ... @@ -1363,7 +1363,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_1_gep_24bit_max ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1376,7 +1376,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_1_gep_24bit_max ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1391,7 +1391,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_1_gep_24bit_max ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1406,7 +1406,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_1_gep_24bit_max ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1421,7 +1421,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_1_gep_24bit_max ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1436,19 +1436,19 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_1_gep_24bit_max ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 8388607, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 8388607, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 8388607 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 8388607 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32), addrspace 1) ... --- @@ -1480,7 +1480,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_1_gep_24bit_min ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1500,7 +1500,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_1_gep_24bit_min ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1515,7 +1515,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_1_gep_24bit_min ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1530,7 +1530,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_1_gep_24bit_min ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1545,7 +1545,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_1_gep_24bit_min ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1560,19 +1560,19 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_1_gep_24bit_min ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], -8388608, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], -8388608, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 -8388608 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 -8388608 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32), addrspace 1) ... @@ -1598,7 +1598,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_1_gep_2x_24bit_max ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1611,7 +1611,7 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_1_gep_2x_24bit_max ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1626,7 +1626,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_1_gep_2x_24bit_max ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1641,7 +1641,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_1_gep_2x_24bit_max ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1656,7 +1656,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_1_gep_2x_24bit_max ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1671,7 +1671,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_1_gep_2x_24bit_max ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1686,12 +1686,12 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 16777214 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 16777214 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32), addrspace 1) ... @@ -1724,7 +1724,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-LABEL: name: store_global_s32_to_1_gep_2x_24bit_min ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1744,7 +1744,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX7-FLAT-LABEL: name: store_global_s32_to_1_gep_2x_24bit_min ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1759,7 +1759,7 @@ body: | ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_to_1_gep_2x_24bit_min ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1774,7 +1774,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_to_1_gep_2x_24bit_min ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1789,7 +1789,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX10-LABEL: name: store_global_s32_to_1_gep_2x_24bit_min ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1804,7 +1804,7 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) ; ; GFX12-LABEL: name: store_global_s32_to_1_gep_2x_24bit_min ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2 @@ -1819,11 +1819,11 @@ body: | ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX12-NEXT: GLOBAL_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (store (i32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = G_CONSTANT i64 -16777215 - %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store (s32), align 4, addrspace 1) + %1:vgpr(i32) = COPY $vgpr2 + %2:vgpr(i64) = G_CONSTANT i64 -16777215 + %3:vgpr(p1) = G_PTR_ADD %0, %2(i64) + G_STORE %1(i32), %3(p1) :: (store (i32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir index fc8d50c79910b..5c6ee7fc74e91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir @@ -30,34 +30,38 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<3 x i32>), align 16, addrspace 1) + ; ; GFX7-FLAT-LABEL: name: store_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16, addrspace 1) + ; ; GFX8-LABEL: name: store_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x i32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: store_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x i32>), align 16, addrspace 1) + ; ; GFX10-LABEL: name: store_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX10-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x i32>), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1) + %1:vgpr(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 16, addrspace 1) ... @@ -76,35 +80,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16, addrspace 1) + ; ; GFX7-FLAT-LABEL: name: store_global_s96 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) + ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16, addrspace 1) + ; ; GFX8-LABEL: name: store_global_s96 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16, addrspace 1) + ; ; GFX9-LABEL: name: store_global_s96 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16, addrspace 1) + ; ; GFX10-LABEL: name: store_global_s96 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: G_STORE [[COPY1]](i96), [[COPY]](p1) :: (store (i96), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 16, addrspace 1) + %1:vgpr(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 16, addrspace 1) ... @@ -123,34 +131,38 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16, addrspace 1) + ; ; GFX7-FLAT-LABEL: name: store_global_v6s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) + ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16, addrspace 1) + ; ; GFX8-LABEL: name: store_global_v6s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: store_global_v6s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16, addrspace 1) + ; ; GFX10-LABEL: name: store_global_v6s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: G_STORE [[COPY1]](<6 x i16>), [[COPY]](p1) :: (store (<6 x i16>), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<6 x s16>), align 16, addrspace 1) + %1:vgpr(<6 x i16>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<6 x i16>), %0(p1) :: (store (<6 x i16>), align 16, addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir index 1b7c0fcb76714..37fe26f3ba5a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -26,29 +26,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i32), addrspace 3) + ; ; GFX7-LABEL: name: store_local_s32_to_4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i32), addrspace 3) + ; ; GFX9-LABEL: name: store_local_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i32), addrspace 3) + ; ; GFX10-LABEL: name: store_local_s32_to_4 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i32), addrspace 3) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s32), align 4, addrspace 3) + G_STORE %0(i32), %1(p3) :: (store (i32), addrspace 3) ... @@ -72,29 +75,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i16), addrspace 3) + ; ; GFX7-LABEL: name: store_local_s32_to_2 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i16), addrspace 3) + ; ; GFX9-LABEL: name: store_local_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i16), addrspace 3) + ; ; GFX10-LABEL: name: store_local_s32_to_2 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i16), addrspace 3) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s16), align 2, addrspace 3) + G_STORE %0(i32), %1(p3) :: (store (i16), addrspace 3) ... @@ -118,29 +124,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX7-LABEL: name: store_local_s32_to_1 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX9-LABEL: name: store_local_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX10-LABEL: name: store_local_s32_to_1 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i8), addrspace 3) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s8), align 1, addrspace 3) + G_STORE %0(i32), %1(p3) :: (store (i8), addrspace 3) ... @@ -164,29 +173,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x i16>), addrspace 3) + ; ; GFX7-LABEL: name: store_local_v2s16 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x i16>), addrspace 3) + ; ; GFX9-LABEL: name: store_local_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 3) + ; ; GFX10-LABEL: name: store_local_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) - %0:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 3) + %0:vgpr(<2 x i16>) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 3) + G_STORE %0(<2 x i16>), %1(p3) :: (store (<2 x i16>), addrspace 3) ... @@ -211,6 +223,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) + ; ; GFX7-LABEL: name: store_local_p3 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} @@ -218,12 +231,14 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) + ; ; GFX9-LABEL: name: store_local_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) + ; ; GFX10-LABEL: name: store_local_p3 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -232,7 +247,7 @@ body: | ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store (p3), align 4, addrspace 3) + G_STORE %0(p3), %1(p3) :: (store (p3), addrspace 3) ... @@ -250,23 +265,26 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX7-LABEL: name: store_local_s32_to_1_constant_4095 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX9-LABEL: name: store_local_s32_to_1_constant_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX10-LABEL: name: store_local_s32_to_1_constant_4095 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (i8), addrspace 3) %0:vgpr(p3) = G_CONSTANT i32 4095 - %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 3) + %1:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %1(i32), %0(p3) :: (store (i8), addrspace 3) ... @@ -289,23 +307,26 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX7-LABEL: name: store_local_s32_to_1_constant_4096 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX9-LABEL: name: store_local_s32_to_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (i8), addrspace 3) + ; ; GFX10-LABEL: name: store_local_s32_to_1_constant_4096 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (i8), addrspace 3) %0:vgpr(p3) = G_CONSTANT i32 4096 - %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 3) + %1:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %1(i32), %0(p3) :: (store (i8), addrspace 3) ... @@ -326,10 +347,11 @@ body: | ; GFX6-LABEL: name: store_local_s64_align4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store (s64), align 4, addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](i64), [[COPY1]](p3) :: (store (i64), align 4, addrspace 3) + ; ; GFX7-LABEL: name: store_local_s64_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -338,7 +360,8 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (i64), align 4, addrspace 3) + ; ; GFX9-LABEL: name: store_local_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -346,7 +369,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (i64), align 4, addrspace 3) + ; ; GFX10-LABEL: name: store_local_s64_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -354,10 +378,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (i64), align 4, addrspace 3) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (s64), align 4, addrspace 3) + G_STORE %0(i64), %1(p3) :: (store (i64), align 4, addrspace 3) ... @@ -382,6 +406,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store (p1), align 4, addrspace 3) + ; ; GFX7-LABEL: name: store_local_p1_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -391,6 +416,7 @@ body: | ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; ; GFX9-LABEL: name: store_local_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -399,6 +425,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; ; GFX10-LABEL: name: store_local_p1_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -409,7 +436,7 @@ body: | ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (p1), align 4, addrspace 3) + G_STORE %0(p1), %1(p3) :: (store (p1), align 4, addrspace 3) ... @@ -430,10 +457,11 @@ body: | ; GFX6-LABEL: name: store_local_v2s32_align4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](<2 x i32>), [[COPY1]](p3) :: (store (<2 x i32>), align 4, addrspace 3) + ; ; GFX7-LABEL: name: store_local_v2s32_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -442,7 +470,8 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x i32>), align 4, addrspace 3) + ; ; GFX9-LABEL: name: store_local_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -450,7 +479,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x i32>), align 4, addrspace 3) + ; ; GFX10-LABEL: name: store_local_v2s32_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -458,10 +488,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x i32>), align 4, addrspace 3) + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (<2 x s32>), align 4, addrspace 3) + G_STORE %0(<2 x i32>), %1(p3) :: (store (<2 x i32>), align 4, addrspace 3) ... @@ -482,10 +512,11 @@ body: | ; GFX6-LABEL: name: store_local_v4s16_align4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](<4 x i16>), [[COPY1]](p3) :: (store (<4 x i16>), align 4, addrspace 3) + ; ; GFX7-LABEL: name: store_local_v4s16_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -494,7 +525,8 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x i16>), align 4, addrspace 3) + ; ; GFX9-LABEL: name: store_local_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -502,7 +534,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x i16>), align 4, addrspace 3) + ; ; GFX10-LABEL: name: store_local_v4s16_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -510,10 +543,10 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x i16>), align 4, addrspace 3) + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (<4 x s16>), align 4, addrspace 3) + G_STORE %0(<4 x i16>), %1(p3) :: (store (<4 x i16>), align 4, addrspace 3) ... @@ -537,29 +570,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i64), addrspace 3) + ; ; GFX7-LABEL: name: store_local_s64_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (i64), addrspace 3) + ; ; GFX9-LABEL: name: store_local_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i64), addrspace 3) + ; ; GFX10-LABEL: name: store_local_s64_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (i64), addrspace 3) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (s64), align 8, addrspace 3) + G_STORE %0(i64), %1(p3) :: (store (i64), addrspace 3) ... @@ -584,6 +620,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) + ; ; GFX7-LABEL: name: store_local_p1_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -591,12 +628,14 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) + ; ; GFX9-LABEL: name: store_local_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) + ; ; GFX10-LABEL: name: store_local_p1_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -605,7 +644,7 @@ body: | ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (p1), align 8, addrspace 3) + G_STORE %0(p1), %1(p3) :: (store (p1), addrspace 3) ... @@ -629,29 +668,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x i32>), addrspace 3) + ; ; GFX7-LABEL: name: store_local_v2s32_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x i32>), addrspace 3) + ; ; GFX9-LABEL: name: store_local_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 3) + ; ; GFX10-LABEL: name: store_local_v2s32_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x i32>), addrspace 3) + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (<2 x s32>), align 8, addrspace 3) + G_STORE %0(<2 x i32>), %1(p3) :: (store (<2 x i32>), addrspace 3) ... @@ -675,29 +717,32 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x i16>), addrspace 3) + ; ; GFX7-LABEL: name: store_local_v4s16_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x i16>), addrspace 3) + ; ; GFX9-LABEL: name: store_local_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 3) + ; ; GFX10-LABEL: name: store_local_v4s16_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) - %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x i16>), addrspace 3) + %0:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store (<4 x s16>), align 8, addrspace 3) + G_STORE %0(<4 x i16>), %1(p3) :: (store (<4 x i16>), addrspace 3) ... @@ -718,12 +763,13 @@ body: | ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1016 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](i64), [[PTR_ADD]](p3) :: (store (i64), align 4, addrspace 3) + ; ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -732,7 +778,8 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store (i64), align 4, addrspace 3) + ; ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -740,7 +787,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (i64), align 4, addrspace 3) + ; ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -748,12 +796,12 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (i64), align 4, addrspace 3) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - %2:vgpr(s32) = G_CONSTANT i32 1016 - %3:vgpr(p3) = G_PTR_ADD %1, %2 - G_STORE %0, %3 :: (store (s64), align 4, addrspace 3) + %2:vgpr(i32) = G_CONSTANT i32 1016 + %3:vgpr(p3) = G_PTR_ADD %1, %2(i32) + G_STORE %0(i64), %3(p3) :: (store (i64), align 4, addrspace 3) ... @@ -774,12 +822,13 @@ body: | ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1020 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](i32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3) + ; GFX6-NEXT: G_STORE [[COPY]](i64), [[PTR_ADD]](p3) :: (store (i64), align 4, addrspace 3) + ; ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -790,7 +839,8 @@ body: | ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[V_ADD_CO_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: DS_WRITE2_B32 [[V_ADD_CO_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (i64), align 4, addrspace 3) + ; ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -800,7 +850,8 @@ body: | ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (i64), align 4, addrspace 3) + ; ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -810,11 +861,11 @@ body: | ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (i64), align 4, addrspace 3) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - %2:vgpr(s32) = G_CONSTANT i32 1020 - %3:vgpr(p3) = G_PTR_ADD %1, %2 - G_STORE %0, %3 :: (store (s64), align 4, addrspace 3) + %2:vgpr(i32) = G_CONSTANT i32 1020 + %3:vgpr(p3) = G_PTR_ADD %1, %2(i32) + G_STORE %0(i64), %3(p3) :: (store (i64), align 4, addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir index 60357abbc7721..c91ae83c6024c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -26,31 +26,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_4 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_4 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i32), addrspace 5) ... @@ -74,31 +74,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i16), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i16), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_2 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_2 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i16), addrspace 5) ... @@ -122,31 +122,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_1 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_1 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i8), addrspace 5) ... @@ -170,31 +170,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_v2s16 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_v2s16 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) - %0:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>), addrspace 5) + %0:vgpr(<2 x i16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) + G_STORE %0(<2 x i16>), %1(p5) :: (store (<2 x i16>), addrspace 5) ... @@ -242,7 +242,7 @@ body: | ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) + G_STORE %0(p3), %1(p5) :: (store (p3), addrspace 5) ... @@ -290,7 +290,7 @@ body: | ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) + G_STORE %0(p5), %1(p5) :: (store (p5), addrspace 5) ... @@ -312,24 +312,24 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX12-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:vgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %3, %2 :: (store (s8), align 1, addrspace 5) + %1:vgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %3(i32), %2(p5) :: (store (i8), addrspace 5) ... @@ -351,24 +351,24 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX12-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4095 - %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) + %1:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %1(i32), %0(p5) :: (store (i8), addrspace 5) ... @@ -391,25 +391,25 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_1_constant_4096 ; GFX12: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX12-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4096 - %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) + %1:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %1(i32), %0(p5) :: (store (i8), addrspace 5) ... @@ -432,31 +432,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_s32_to_4 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_s32_to_4 ; GFX12: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i32), addrspace 5) ... @@ -479,31 +479,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i16), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i16), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_s32_to_2 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_s32_to_2 ; GFX12: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i16), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i16), addrspace 5) ... @@ -526,31 +526,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_s32_to_1 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_s32_to_1 ; GFX12: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i8), addrspace 5) ... @@ -574,31 +574,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x i16>), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_v2s16 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_v2s16 ; GFX12: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) - %0:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x i16>), addrspace 5) + %0:vgpr(<2 x i16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) + G_STORE %0(<2 x i16>), %1(p5) :: (store (<2 x i16>), addrspace 5) ... @@ -645,7 +645,7 @@ body: | ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) + G_STORE %0(p3), %1(p5) :: (store (p3), addrspace 5) ... @@ -692,7 +692,7 @@ body: | ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) + G_STORE %0(p5), %1(p5) :: (store (p5), addrspace 5) ... @@ -716,30 +716,30 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX12-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:vgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %3, %2 :: (store (s8), align 1, addrspace 5) + %1:vgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %3(i32), %2(p5) :: (store (i8), addrspace 5) ... @@ -763,30 +763,30 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4095 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_s32_to_1_constant_4095 ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_s32_to_1_constant_4095 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX12-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4095 - %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) + %1:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %1(i32), %0(p5) :: (store (i8), addrspace 5) ... @@ -811,31 +811,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4096 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i8), addrspace 5) ; ; GFX11-LABEL: name: kernel_store_private_s32_to_1_constant_4096 ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) ; ; GFX12-LABEL: name: kernel_store_private_s32_to_1_constant_4096 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX12-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX12-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4096 - %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) + %1:vgpr(i32) = G_CONSTANT i32 0 + G_STORE %1(i32), %0(p5) :: (store (i8), addrspace 5) ... @@ -857,30 +857,30 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) + G_STORE %0(i32), %1(p5) :: (store (i32), addrspace 5) ... @@ -903,13 +903,13 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX11: liveins: $vgpr0, $vgpr1 @@ -919,20 +919,20 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - %2:sgpr(s32) = G_CONSTANT i32 4095 - %3:vgpr(s32) = COPY %2 - %4:vgpr(p5) = G_PTR_ADD %1, %3 - G_STORE %0, %4 :: (store (s32), align 4, addrspace 5) + %2:sgpr(i32) = G_CONSTANT i32 4095 + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(p5) = G_PTR_ADD %1, %3(i32) + G_STORE %0(i32), %4(p5) :: (store (i32), addrspace 5) ... @@ -954,13 +954,13 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX11: liveins: $vgpr0, $vgpr1 @@ -969,19 +969,19 @@ body: | ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - %2:vgpr(s32) = G_CONSTANT i32 4095 - %3:vgpr(p5) = G_PTR_ADD %1, %2 - G_STORE %0, %3 :: (store (s32), align 4, addrspace 5) + %2:vgpr(i32) = G_CONSTANT i32 4095 + %3:vgpr(p5) = G_PTR_ADD %1, %2(i32) + G_STORE %0(i32), %3(p5) :: (store (i32), addrspace 5) ... @@ -1007,7 +1007,7 @@ body: | ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -1017,7 +1017,7 @@ body: | ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (i32), addrspace 5) ; ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX11: liveins: $vgpr0, $vgpr1 @@ -1027,19 +1027,19 @@ body: | ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) ; ; GFX12-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX12: liveins: $vgpr0, $vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4096, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) - %0:vgpr(s32) = COPY $vgpr0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4096, 0, implicit $exec, implicit $flat_scr :: (store (i32), addrspace 5) + %0:vgpr(i32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - %2:sgpr(s32) = G_CONSTANT i32 4096 - %3:vgpr(s32) = COPY %2 - %4:vgpr(p5) = G_PTR_ADD %1, %3 - G_STORE %0, %4 :: (store (s32), align 4, addrspace 5) + %2:sgpr(i32) = G_CONSTANT i32 4096 + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(p5) = G_PTR_ADD %1, %3(i32) + G_STORE %0(i32), %4(p5) :: (store (i32), addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir index 56ac78c87cc15..575ab4187257b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -39,21 +39,21 @@ body: | ; GFX9-NEXT: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 %3:vgpr(p1) = COPY $vgpr3_vgpr4 - %4:sgpr(s32) = G_CONSTANT i32 1 - %5:sgpr(s32) = G_CONSTANT i32 4096 + %4:sgpr(i32) = G_CONSTANT i32 1 + %5:sgpr(i32) = G_CONSTANT i32 4096 + %6:sgpr(i32) = G_SUB %0, %1 + %7:vgpr(i32) = G_SUB %2, %6 + %8:vgpr(i32) = G_SUB %6, %7 + %9:vgpr(i32) = G_SUB %8, %2 + S_ENDPGM 0, implicit %9(i32) - %6:sgpr(s32) = G_SUB %0, %1 - %7:vgpr(s32) = G_SUB %2, %6 - %8:vgpr(s32) = G_SUB %6, %7 - %9:vgpr(s32) = G_SUB %8, %2 - S_ENDPGM 0, implicit %9 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir index 065e5dd5b81bc..9e59c688f7003 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir @@ -15,9 +15,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + S_ENDPGM 0, implicit %1(i1) ... --- @@ -33,9 +33,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -52,9 +52,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -71,9 +71,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i16) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -90,9 +90,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s1) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i1) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i1) ... --- @@ -109,9 +109,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(i16) = G_TRUNC %0(i96) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -128,9 +128,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s64) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(i64) = G_TRUNC %0(i96) + S_ENDPGM 0, implicit %1(i64) ... --- @@ -147,9 +147,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i16) = G_TRUNC %0(i128) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -166,9 +166,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s96) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i96) = G_TRUNC %0(i128) + S_ENDPGM 0, implicit %1(i96) ... --- @@ -185,9 +185,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s128) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(i128) = G_TRUNC %0(i256) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -204,9 +204,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(s512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(s256) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(i512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(i256) = G_TRUNC %0(i512) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -222,9 +222,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + S_ENDPGM 0, implicit %1(i1) ... --- @@ -240,9 +240,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -259,9 +259,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -278,9 +278,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i16) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -297,9 +297,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s1) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i1) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i1) ... --- @@ -316,9 +316,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(i16) = G_TRUNC %0(i96) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -335,9 +335,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]].sub0_sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:vgpr(s64) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(i64) = G_TRUNC %0(i96) + S_ENDPGM 0, implicit %1(i64) ... --- @@ -354,9 +354,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(i16) = G_TRUNC %0(i128) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -373,9 +373,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY [[COPY]].sub0_sub1_sub2 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:vgpr(s96) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(i96) = G_TRUNC %0(i128) + S_ENDPGM 0, implicit %1(i96) ... --- @@ -392,9 +392,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:vgpr(s128) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(i128) = G_TRUNC %0(i256) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -411,9 +411,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:vgpr(s256) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:vgpr(i256) = G_TRUNC %0(i512) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -433,9 +433,9 @@ body: | ; GCN-NEXT: $scc = COPY [[COPY]] ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s1) = G_TRUNC %0 - %3:sgpr(s32) = G_SELECT %2, %0, %1 - S_ENDPGM 0, implicit %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i1) = G_TRUNC %0(i32) + %3:sgpr(i32) = G_SELECT %2(i1), %0, %1 + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir index 4db71203231f2..d73b0a7c675ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir @@ -49,9 +49,9 @@ body: | ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def dead $scc ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(<2 x i16>) = G_TRUNC %0(<2 x i32>) + S_ENDPGM 0, implicit %1(<2 x i16>) ... --- @@ -95,7 +95,7 @@ body: | ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; GFX11-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] - %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + %0:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x i16>) = G_TRUNC %0(<2 x i32>) + S_ENDPGM 0, implicit %1(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir index ac6e610092e02..5520491625f90 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir @@ -26,16 +26,16 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 - %7:vgpr(s32) = G_CONSTANT i32 0 - %8:vgpr(s32) = G_CONSTANT i32 1 - %9:vgpr(s32) = G_SELECT %6, %7, %8 - S_ENDPGM 0, implicit %5, implicit %9 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_UADDE %0, %1, %4 + %7:vgpr(i32) = G_CONSTANT i32 0 + %8:vgpr(i32) = G_CONSTANT i32 1 + %9:vgpr(i32) = G_SELECT %6(i1), %7, %8 + S_ENDPGM 0, implicit %5(i32), implicit %9(i32) ... --- @@ -60,14 +60,14 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 - %7:vgpr(s32) = G_CONSTANT i32 0 - %8:vgpr(s32) = G_CONSTANT i32 1 - %9:vgpr(s32) = G_SELECT %6, %7, %8 - S_ENDPGM 0, implicit %5, implicit %9 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_UADDE %0, %1, %4 + %7:vgpr(i32) = G_CONSTANT i32 0 + %8:vgpr(i32) = G_CONSTANT i32 1 + %9:vgpr(i32) = G_SELECT %6(i1), %7, %8 + S_ENDPGM 0, implicit %5(i32), implicit %9(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir index e6510acb41d89..356dd6c2c76ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir @@ -45,14 +45,14 @@ body: | ; WAVE32-NEXT: $scc = COPY [[COPY4]] ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 - %5:sgpr(s32), %6:sgpr(s32) = G_UADDE %0, %1, %4 - %7:sgpr(s32) = G_SELECT %6, %0, %1 - S_ENDPGM 0, implicit %5, implicit %7 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + %5:sgpr(i32), %6:sgpr(i32) = G_UADDE %0, %1, %4 + %7:sgpr(i32) = G_SELECT %6(i32), %0, %1 + S_ENDPGM 0, implicit %5(i32), implicit %7(i32) ... --- @@ -87,14 +87,14 @@ body: | ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 - %7:vgpr(s32) = G_SELECT %6, %0, %1 - S_ENDPGM 0, implicit %5, implicit %7 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_UADDE %0, %1, %4 + %7:vgpr(i32) = G_SELECT %6(i1), %0, %1 + S_ENDPGM 0, implicit %5(i32), implicit %7(i32) ... --- @@ -131,13 +131,13 @@ body: | ; WAVE32-NEXT: $scc = COPY [[COPY3]] ; WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def dead $scc, implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 - %5:sgpr(s32), %6:sgpr(s32) = G_UADDE %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + %5:sgpr(i32), %6:sgpr(i32) = G_UADDE %0, %1, %4 + S_ENDPGM 0, implicit %5(i32) ... --- @@ -170,11 +170,11 @@ body: | ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_UADDE %0, %1, %4 + S_ENDPGM 0, implicit %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir index 49c9263959eaa..965483d0b99de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir @@ -24,6 +24,7 @@ body: | ; GFX6-NEXT: $scc = COPY [[COPY2]] ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] + ; ; GFX8-LABEL: name: uaddo_s32_s1_sss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} @@ -34,6 +35,7 @@ body: | ; GFX8-NEXT: $scc = COPY [[COPY2]] ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] + ; ; GFX9-LABEL: name: uaddo_s32_s1_sss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -44,6 +46,7 @@ body: | ; GFX9-NEXT: $scc = COPY [[COPY2]] ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] + ; ; GFX10-LABEL: name: uaddo_s32_s1_sss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} @@ -54,11 +57,11 @@ body: | ; GFX10-NEXT: $scc = COPY [[COPY2]] ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32), %3:sgpr(s32) = G_UADDO %0, %1 - %4:sgpr(s32) = G_SELECT %3, %0, %1 - S_ENDPGM 0, implicit %2, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32), %3:sgpr(i32) = G_UADDO %0, %1 + %4:sgpr(i32) = G_SELECT %3(i32), %0, %1 + S_ENDPGM 0, implicit %2(i32), implicit %4(i32) ... --- @@ -78,6 +81,7 @@ body: | ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX8-LABEL: name: uaddo_s32_s1_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -86,6 +90,7 @@ body: | ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX9-LABEL: name: uaddo_s32_s1_vvv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -94,6 +99,7 @@ body: | ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX10-LABEL: name: uaddo_s32_s1_vvv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -102,11 +108,11 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1 - %4:vgpr(s32) = G_SELECT %3, %0, %1 - S_ENDPGM 0, implicit %2, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32), %3:vcc(i1) = G_UADDO %0, %1 + %4:vgpr(i32) = G_SELECT %3(i1), %0, %1 + S_ENDPGM 0, implicit %2(i32), implicit %4(i32) ... --- @@ -128,6 +134,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX8-LABEL: name: uaddo_s32_s1_vsv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} @@ -138,6 +145,7 @@ body: | ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX9-LABEL: name: uaddo_s32_s1_vsv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -148,6 +156,7 @@ body: | ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX10-LABEL: name: uaddo_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -158,13 +167,13 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1 - %4:vgpr(s32) = G_CONSTANT i32 0 - %5:vgpr(s32) = G_CONSTANT i32 1 - %6:vgpr(s32) = G_SELECT %3, %4, %5 - S_ENDPGM 0, implicit %2, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32), %3:vcc(i1) = G_UADDO %0, %1 + %4:vgpr(i32) = G_CONSTANT i32 0 + %5:vgpr(i32) = G_CONSTANT i32 1 + %6:vgpr(i32) = G_SELECT %3(i1), %4, %5 + S_ENDPGM 0, implicit %2(i32), implicit %6(i32) ... --- @@ -186,6 +195,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX8-LABEL: name: uaddo_s32_s1_vvs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} @@ -196,6 +206,7 @@ body: | ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX9-LABEL: name: uaddo_s32_s1_vvs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -206,6 +217,7 @@ body: | ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX10-LABEL: name: uaddo_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -216,11 +228,11 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1 - %4:vgpr(s32) = G_CONSTANT i32 0 - %5:vgpr(s32) = G_CONSTANT i32 1 - %6:vgpr(s32) = G_SELECT %3, %4, %5 - S_ENDPGM 0, implicit %2, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32), %3:vcc(i1) = G_UADDO %0, %1 + %4:vgpr(i32) = G_CONSTANT i32 0 + %5:vgpr(i32) = G_CONSTANT i32 1 + %6:vgpr(i32) = G_SELECT %3(i1), %4, %5 + S_ENDPGM 0, implicit %2(i32), implicit %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir index be2da5a79a48a..3a08ae820c2a0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir @@ -21,11 +21,11 @@ body: | ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 2 - %2:vgpr(s32) = G_CONSTANT i32 10 - %3:vgpr(s32) = G_UBFX %0, %1(s32), %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = G_CONSTANT i32 2 + %2:vgpr(i32) = G_CONSTANT i32 10 + %3:vgpr(i32) = G_UBFX %0, %1(i32), %2 + S_ENDPGM 0, implicit %3(i32) ... --- @@ -44,9 +44,9 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_UBFX %0, %1(s32), %2 - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_UBFX %0, %1(i32), %2 + S_ENDPGM 0, implicit %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir index 35d622dc57d18..4824fccae47e6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -34,9 +34,10 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_UITOFP %0 - $vgpr0 = COPY %1 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f32) = G_UITOFP %0(i32) + %2:vgpr(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -69,9 +70,10 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_UITOFP %0 - $vgpr0 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(f32) = G_UITOFP %0(i32) + %2:vgpr(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -117,10 +119,11 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_UITOFP %0 - %2:vgpr(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(f16) = G_UITOFP %0(i32) + %2:vgpr(i16) = G_BITCAST %1(f16) + %3:vgpr(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -166,8 +169,9 @@ body: | ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s16) = G_UITOFP %0 - %2:vgpr(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(f16) = G_UITOFP %0(i32) + %2:vgpr(i16) = G_BITCAST %1(f16) + %3:vgpr(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir index ac2c595069700..52a4e24af83ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MAX_U32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_UMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_UMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_UMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_UMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -60,10 +60,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_UMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_UMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -81,8 +81,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_UMAX %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_UMAX %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir index b7c89b61f5cc4..40e648b72f7c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir @@ -18,10 +18,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_UMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_UMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -39,10 +39,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_UMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_UMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -60,10 +60,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_UMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_UMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -81,8 +81,8 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_UMIN %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_UMIN %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir index 7a43e600e3a66..b2195629e8fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir @@ -6,7 +6,7 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # ERR-NOT: remark: -# ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss) +# ERR: remark: :0:0: cannot select: %2:sgpr(i32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss) # ERR-NOT: remark: --- @@ -21,10 +21,11 @@ body: | ; SI-LABEL: name: umulh_s32_ss ; SI: liveins: $sgpr0, $sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; SI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; SI-NEXT: S_ENDPGM 0, implicit [[UMULH]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; SI-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; SI-NEXT: S_ENDPGM 0, implicit [[UMULH]](i32) + ; ; GFX9-LABEL: name: umulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -32,10 +33,10 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY]], [[COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_MUL_HI_U32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_UMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_UMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -54,6 +55,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] + ; ; GFX9-LABEL: name: umulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -61,10 +63,10 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = G_UMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = G_UMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -83,6 +85,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] + ; ; GFX9-LABEL: name: umulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -90,10 +93,10 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_UMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_UMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -112,6 +115,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] + ; ; GFX9-LABEL: name: umulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -119,8 +123,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_UMULH %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_UMULH %0, %1 + S_ENDPGM 0, implicit %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index bec5f646b7839..f26bf2cc4a69f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -18,9 +18,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32), %2:vgpr(i32) = G_UNMERGE_VALUES %0(i64) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32) ... --- @@ -40,9 +40,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32), %2:sgpr(i32) = G_UNMERGE_VALUES %0(i64) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32) ... --- @@ -62,9 +62,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:vgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:vgpr(i32), %2:sgpr(i32) = G_UNMERGE_VALUES %0(i64) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32) ... --- @@ -84,9 +84,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i32), %2:vgpr(i32) = G_UNMERGE_VALUES %0(i64) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32) ... --- @@ -107,9 +107,9 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] - %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2, implicit %3 + %0:sgpr(i96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(i32), %2:sgpr(i32), %3:sgpr(i32) = G_UNMERGE_VALUES %0(i96) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32), implicit %3(i32) ... --- @@ -131,9 +131,9 @@ body: | ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub3 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32), %4:sgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i32), %2:sgpr(i32), %3:sgpr(i32), %4:sgpr(i32) = G_UNMERGE_VALUES %0(i128) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32), implicit %3(i32), implicit %4(i32) ... --- @@ -153,9 +153,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub2_sub3 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(s64), %2:sgpr(s64) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:sgpr(i128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(i64), %2:sgpr(i64) = G_UNMERGE_VALUES %0(i128) + S_ENDPGM 0, implicit %1(i64), implicit %2(i64) ... --- @@ -176,9 +176,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(s192) = G_IMPLICIT_DEF - %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2, implicit %3 + %0:sgpr(i192) = G_IMPLICIT_DEF + %1:sgpr(i64), %2:sgpr(i64), %3:sgpr(i64) = G_UNMERGE_VALUES %0(i192) + S_ENDPGM 0, implicit %1(i64), implicit %2(i64), implicit %3(i64) ... --- @@ -198,9 +198,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr_32(s32), %2:vgpr_32(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr_32(i32), %2:vgpr_32(i32) = G_UNMERGE_VALUES %0(i64) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32) ... --- @@ -220,9 +220,9 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] - %0:vreg_64(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + %0:vreg_64(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i32), %2:vgpr(i32) = G_UNMERGE_VALUES %0(i64) + S_ENDPGM 0, implicit %1(i32), implicit %2(i32) ... --- @@ -244,9 +244,9 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] - %0:sgpr(s1024) = G_IMPLICIT_DEF - %1:sgpr(s256), %2:sgpr(s256), %3:sgpr(s256), %4:sgpr(s256) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 + %0:sgpr(i1024) = G_IMPLICIT_DEF + %1:sgpr(i256), %2:sgpr(i256), %3:sgpr(i256), %4:sgpr(i256) = G_UNMERGE_VALUES %0(i1024) + S_ENDPGM 0, implicit %1(i256), implicit %2(i256), implicit %3(i256), implicit %4(i256) ... --- @@ -268,10 +268,10 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_512 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[COPY1]] ; GCN-NEXT: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[COPY2]] - %0:sgpr(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %1:sgpr(s512), %2:sgpr(s512) = G_UNMERGE_VALUES %0 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 - $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2 + %0:sgpr(i1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(i512), %2:sgpr(i512) = G_UNMERGE_VALUES %0(i1024) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1(i512) + $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2(i512) ... --- @@ -287,28 +287,28 @@ body: | ; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5_sgpr6, $sgpr8_sgpr9_sgpr10, $sgpr12_sgpr13_sgpr14 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr12_sgpr13_sgpr14 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 - %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 - %3:sgpr(<3 x s32>) = COPY $sgpr12_sgpr13_sgpr14 - %4:sgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3 - %5:sgpr(<3 x s32>), %6:sgpr(<3 x s32>), %7:sgpr(<3 x s32>), %8:sgpr(<3 x s32>) = G_UNMERGE_VALUES %4 - $sgpr0_sgpr1_sgpr2 = COPY %5 - $sgpr4_sgpr5_sgpr6 = COPY %6 - $sgpr8_sgpr9_sgpr10 = COPY %7 - $sgpr12_sgpr13_sgpr14 = COPY %8 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(<3 x i32>) = COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr(<3 x i32>) = COPY $sgpr4_sgpr5_sgpr6 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr(<3 x i32>) = COPY $sgpr8_sgpr9_sgpr10 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr(<3 x i32>) = COPY $sgpr12_sgpr13_sgpr14 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x i32>) = G_CONCAT_VECTORS [[COPY]](<3 x i32>), [[COPY1]](<3 x i32>), [[COPY2]](<3 x i32>), [[COPY3]](<3 x i32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x i32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x i32>) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x i32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x i32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x i32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x i32>), [[UV:%[0-9]+]]:sgpr_96(<3 x i32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x i32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x i32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x i32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x i32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x i32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x i32>) + %0:sgpr(<3 x i32>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(<3 x i32>) = COPY $sgpr4_sgpr5_sgpr6 + %2:sgpr(<3 x i32>) = COPY $sgpr8_sgpr9_sgpr10 + %3:sgpr(<3 x i32>) = COPY $sgpr12_sgpr13_sgpr14 + %4:sgpr(<12 x i32>) = G_CONCAT_VECTORS %0(<3 x i32>), %1(<3 x i32>), %2(<3 x i32>), %3(<3 x i32>) + %5:sgpr(<3 x i32>), %6:sgpr(<3 x i32>), %7:sgpr(<3 x i32>), %8:sgpr(<3 x i32>) = G_UNMERGE_VALUES %4(<12 x i32>) + $sgpr0_sgpr1_sgpr2 = COPY %5(<3 x i32>) + $sgpr4_sgpr5_sgpr6 = COPY %6(<3 x i32>) + $sgpr8_sgpr9_sgpr10 = COPY %7(<3 x i32>) + $sgpr12_sgpr13_sgpr14 = COPY %8(<3 x i32>) ... @@ -335,13 +335,13 @@ body: | ; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]] ; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]] ; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]] - %0:vgpr(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:vgpr(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - %2:vgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:vgpr(<3 x s32>), %4:vgpr(<3 x s32>), %5:vgpr(<3 x s32>), %6:vgpr(<3 x s32>) = G_UNMERGE_VALUES %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 - $vgpr3_vgpr4_vgpr5 = COPY %4 - $vgpr6_vgpr7_vgpr8 = COPY %5 - $vgpr9_vgpr10_vgpr11 = COPY %6 + %0:vgpr(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:vgpr(<6 x i32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(<12 x i32>) = G_CONCAT_VECTORS %0(<6 x i32>), %1(<6 x i32>) + %3:vgpr(<3 x i32>), %4:vgpr(<3 x i32>), %5:vgpr(<3 x i32>), %6:vgpr(<3 x i32>) = G_UNMERGE_VALUES %2(<12 x i32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) + $vgpr3_vgpr4_vgpr5 = COPY %4(<3 x i32>) + $vgpr6_vgpr7_vgpr8 = COPY %5(<3 x i32>) + $vgpr9_vgpr10_vgpr11 = COPY %6(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir index d7ba80fdc4784..feda9fea55205 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir @@ -26,16 +26,16 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 - %7:vgpr(s32) = G_CONSTANT i32 0 - %8:vgpr(s32) = G_CONSTANT i32 1 - %9:vgpr(s32) = G_SELECT %6, %7, %8 - S_ENDPGM 0, implicit %5, implicit %9 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_USUBE %0, %1, %4 + %7:vgpr(i32) = G_CONSTANT i32 0 + %8:vgpr(i32) = G_CONSTANT i32 1 + %9:vgpr(i32) = G_SELECT %6(i1), %7, %8 + S_ENDPGM 0, implicit %5(i32), implicit %9(i32) ... --- @@ -60,14 +60,14 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 - %7:vgpr(s32) = G_CONSTANT i32 0 - %8:vgpr(s32) = G_CONSTANT i32 1 - %9:vgpr(s32) = G_SELECT %6, %7, %8 - S_ENDPGM 0, implicit %5, implicit %9 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_USUBE %0, %1, %4 + %7:vgpr(i32) = G_CONSTANT i32 0 + %8:vgpr(i32) = G_CONSTANT i32 1 + %9:vgpr(i32) = G_SELECT %6(i1), %7, %8 + S_ENDPGM 0, implicit %5(i32), implicit %9(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir index d68557289b603..32e730d93f53d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir @@ -45,14 +45,14 @@ body: | ; WAVE32-NEXT: $scc = COPY [[COPY4]] ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 - %5:sgpr(s32), %6:sgpr(s32) = G_USUBE %0, %1, %4 - %7:sgpr(s32) = G_SELECT %6, %0, %1 - S_ENDPGM 0, implicit %5, implicit %7 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + %5:sgpr(i32), %6:sgpr(i32) = G_USUBE %0, %1, %4 + %7:sgpr(i32) = G_SELECT %6(i32), %0, %1 + S_ENDPGM 0, implicit %5(i32), implicit %7(i32) ... --- @@ -87,14 +87,14 @@ body: | ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 - %7:vgpr(s32) = G_SELECT %6, %0, %1 - S_ENDPGM 0, implicit %5, implicit %7 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_USUBE %0, %1, %4 + %7:vgpr(i32) = G_SELECT %6(i1), %0, %1 + S_ENDPGM 0, implicit %5(i32), implicit %7(i32) ... --- @@ -131,13 +131,13 @@ body: | ; WAVE32-NEXT: $scc = COPY [[COPY3]] ; WAVE32-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def dead $scc, implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 - %5:sgpr(s32), %6:sgpr(s32) = G_USUBE %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = G_CONSTANT i32 0 + %4:sgpr(i32) = G_ICMP intpred(eq), %2(i32), %3 + %5:sgpr(i32), %6:sgpr(i32) = G_USUBE %0, %1, %4 + S_ENDPGM 0, implicit %5(i32) ... --- @@ -170,11 +170,11 @@ body: | ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 - %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_CONSTANT i32 0 + %4:vcc(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:vgpr(i32), %6:vcc(i1) = G_USUBE %0, %1, %4 + S_ENDPGM 0, implicit %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir index 3cd5d82dbfa54..3d109e983dbf2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir @@ -24,6 +24,7 @@ body: | ; GFX6-NEXT: $scc = COPY [[COPY2]] ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] + ; ; GFX8-LABEL: name: usubo_s32_s1_sss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} @@ -34,6 +35,7 @@ body: | ; GFX8-NEXT: $scc = COPY [[COPY2]] ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] + ; ; GFX9-LABEL: name: usubo_s32_s1_sss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -44,6 +46,7 @@ body: | ; GFX9-NEXT: $scc = COPY [[COPY2]] ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] + ; ; GFX10-LABEL: name: usubo_s32_s1_sss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} @@ -54,11 +57,11 @@ body: | ; GFX10-NEXT: $scc = COPY [[COPY2]] ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32), %3:sgpr(s32) = G_USUBO %0, %1 - %4:sgpr(s32) = G_SELECT %3, %0, %1 - S_ENDPGM 0, implicit %2, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32), %3:sgpr(i32) = G_USUBO %0, %1 + %4:sgpr(i32) = G_SELECT %3(i32), %0, %1 + S_ENDPGM 0, implicit %2(i32), implicit %4(i32) ... --- @@ -78,6 +81,7 @@ body: | ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX8-LABEL: name: usubo_s32_s1_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -86,6 +90,7 @@ body: | ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX9-LABEL: name: usubo_s32_s1_vvv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -94,6 +99,7 @@ body: | ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX10-LABEL: name: usubo_s32_s1_vvv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -102,11 +108,11 @@ body: | ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1 - %4:vgpr(s32) = G_SELECT %3, %0, %1 - S_ENDPGM 0, implicit %2, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32), %3:vcc(i1) = G_USUBO %0, %1 + %4:vgpr(i32) = G_SELECT %3(i1), %0, %1 + S_ENDPGM 0, implicit %2(i32), implicit %4(i32) ... --- @@ -128,6 +134,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX8-LABEL: name: usubo_s32_s1_vsv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} @@ -138,6 +145,7 @@ body: | ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX9-LABEL: name: usubo_s32_s1_vsv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -148,6 +156,7 @@ body: | ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX10-LABEL: name: usubo_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -158,13 +167,13 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1 - %4:vgpr(s32) = G_CONSTANT i32 0 - %5:vgpr(s32) = G_CONSTANT i32 1 - %6:vgpr(s32) = G_SELECT %3, %4, %5 - S_ENDPGM 0, implicit %2, implicit %6 + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32), %3:vcc(i1) = G_USUBO %0, %1 + %4:vgpr(i32) = G_CONSTANT i32 0 + %5:vgpr(i32) = G_CONSTANT i32 1 + %6:vgpr(i32) = G_SELECT %3(i1), %4, %5 + S_ENDPGM 0, implicit %2(i32), implicit %6(i32) ... --- @@ -186,6 +195,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX8-LABEL: name: usubo_s32_s1_vvs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} @@ -196,6 +206,7 @@ body: | ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX9-LABEL: name: usubo_s32_s1_vvs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -206,6 +217,7 @@ body: | ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; GFX10-LABEL: name: usubo_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -216,11 +228,11 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1 - %4:vgpr(s32) = G_CONSTANT i32 0 - %5:vgpr(s32) = G_CONSTANT i32 1 - %6:vgpr(s32) = G_SELECT %3, %4, %5 - S_ENDPGM 0, implicit %2, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = COPY $sgpr0 + %2:vgpr(i32), %3:vcc(i1) = G_USUBO %0, %1 + %4:vgpr(i32) = G_CONSTANT i32 0 + %5:vgpr(i32) = G_CONSTANT i32 1 + %6:vgpr(i32) = G_SELECT %3(i1), %4, %5 + S_ENDPGM 0, implicit %2(i32), implicit %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir index f2daa23db47a2..cbefb69e9d68f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -35,13 +35,13 @@ body: | ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_CONSTANT i32 0 - %3:vcc(s1) = G_ICMP intpred(eq), %0, %2 - %4:vcc(s1) = G_ICMP intpred(eq), %1, %2 - %5:vcc(s1) = G_XOR %3, %4 - S_ENDPGM 0, implicit %5 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_CONSTANT i32 0 + %3:vcc(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:vcc(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:vcc(i1) = G_XOR %3, %4 + S_ENDPGM 0, implicit %5(i1) ... # Should fail to select @@ -71,12 +71,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s1) = G_TRUNC %0 - %3:sgpr(s1) = G_TRUNC %1 - %4:sgpr(s1) = G_XOR %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i1) = G_TRUNC %0(i32) + %3:sgpr(i1) = G_TRUNC %1(i32) + %4:sgpr(i1) = G_XOR %2, %3 + S_ENDPGM 0, implicit %4(i1) ... --- @@ -104,12 +104,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - %4:sgpr(s16) = G_XOR %2, %3 - S_ENDPGM 0, implicit %4 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i16) = G_TRUNC %0(i32) + %3:sgpr(i16) = G_TRUNC %1(i32) + %4:sgpr(i16) = G_XOR %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -137,12 +137,12 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_TRUNC %1 - %4:vgpr(s16) = G_XOR %2, %3 - S_ENDPGM 0, implicit %4 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i16) = G_TRUNC %0(i32) + %3:vgpr(i16) = G_TRUNC %1(i32) + %4:vgpr(i16) = G_XOR %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -170,10 +170,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -201,10 +201,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] - %0:sgpr(s64) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = COPY $sgpr2_sgpr3 - %2:sgpr(s64) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(i64) = COPY $sgpr0_sgpr1 + %1:sgpr(i64) = COPY $sgpr2_sgpr3 + %2:sgpr(i64) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -232,10 +232,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i16>) = COPY $sgpr0 + %1:sgpr(<2 x i16>) = COPY $sgpr1 + %2:sgpr(<2 x i16>) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -263,10 +263,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] - %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:sgpr(<2 x s32>) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:sgpr(<2 x i32>) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -294,10 +294,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:sgpr(<4 x s16>) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:sgpr(<4 x i16>) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -325,10 +325,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(i32) ... --- @@ -356,10 +356,10 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:vgpr(<2 x i16>) = COPY $vgpr1 + %2:vgpr(<2 x i16>) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -377,22 +377,22 @@ body: | ; WAVE64-LABEL: name: xor_s64_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: [[XOR:%[0-9]+]]:vgpr(s64) = G_XOR [[COPY]], [[COPY1]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[XOR]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[XOR:%[0-9]+]]:vgpr(i64) = G_XOR [[COPY]], [[COPY1]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[XOR]](i64) ; ; WAVE32-LABEL: name: xor_s64_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: [[XOR:%[0-9]+]]:vgpr(s64) = G_XOR [[COPY]], [[COPY1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[XOR]](s64) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_XOR %0, %1 - S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[XOR:%[0-9]+]]:vgpr(i64) = G_XOR [[COPY]], [[COPY1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[XOR]](i64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:vgpr(i64) = COPY $vgpr2_vgpr3 + %2:vgpr(i64) = G_XOR %0, %1 + S_ENDPGM 0, implicit %2(i64) ... --- @@ -428,14 +428,14 @@ body: | ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s1) = G_TRUNC %0 - %3:vgpr(s1) = G_TRUNC %1 - %4:vcc(s1) = COPY %2 - %5:vcc(s1) = COPY %3 - %6:vcc(s1) = G_XOR %4, %5 - S_ENDPGM 0, implicit %6 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i1) = G_TRUNC %0(i32) + %3:vgpr(i1) = G_TRUNC %1(i32) + %4:vcc(i1) = COPY %2(i1) + %5:vcc(i1) = COPY %3(i1) + %6:vcc(i1) = G_XOR %4, %5 + S_ENDPGM 0, implicit %6(i1) ... # The selector for the copy of the xor result may constrain the result @@ -476,17 +476,17 @@ body: | ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %1:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s1) = G_TRUNC %1(s32) - %sgpr0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s1) = G_TRUNC %sgpr0 - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(p1) = G_IMPLICIT_DEF - %9:vcc(s1) = COPY %0(s1) - %10:vcc(s1) = COPY %2(s1) - %8:vcc(s1) = G_XOR %9, %10 - %3:sreg_32_xm0(s1) = COPY %8(s1) - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %sgpr0:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %sgpr0(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:sgpr(f64) = G_IMPLICIT_DEF + %6:vcc(i1) = COPY %1(i1) + %7:vcc(i1) = COPY %3(i1) + %8:vcc(i1) = G_XOR %6, %7 + %9:sreg_32_xm0(i1) = COPY %8(i1) + S_ENDPGM 0, implicit %9(i1) ... @@ -524,16 +524,16 @@ body: | ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]] ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] - %1:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s1) = G_TRUNC %1(s32) - %sgpr0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s1) = G_TRUNC %sgpr0 - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(p1) = G_IMPLICIT_DEF - %9:vcc(s1) = COPY %0(s1) - %10:vcc(s1) = COPY %2(s1) - %8:vcc(s1) = G_XOR %9, %10 - %3:sreg_64_xexec(s1) = COPY %8(s1) - S_ENDPGM 0, implicit %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %sgpr0:sgpr(i32) = COPY $sgpr0 + %3:sgpr(i1) = G_TRUNC %sgpr0(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:sgpr(f64) = G_IMPLICIT_DEF + %6:vcc(i1) = COPY %1(i1) + %7:vcc(i1) = COPY %3(i1) + %8:vcc(i1) = G_XOR %6, %7 + %9:sreg_64_xexec(i1) = COPY %8(i1) + S_ENDPGM 0, implicit %9(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir index b709ddf2e3587..46a1d52fe3463 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -17,11 +17,11 @@ body: | ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_AND_B32_]] ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s16) = G_ZEXT %1 - %3:sgpr(s32) = G_SEXT %2 - $sgpr0 = COPY %3 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i16) = G_ZEXT %1(i1) + %3:sgpr(i32) = G_SEXT %2(i16) + $sgpr0 = COPY %3(i32) ... --- @@ -39,10 +39,10 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_ZEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ZEXT %1(i1) + $sgpr0 = COPY %2(i32) ... --- @@ -62,10 +62,10 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:sgpr(s64) = G_ZEXT %1 - $sgpr0_sgpr1 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:sgpr(i64) = G_ZEXT %1(i1) + $sgpr0_sgpr1 = COPY %2(i64) ... --- @@ -84,10 +84,10 @@ body: | ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], [[COPY]], implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s32) = G_ZEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ZEXT %1(i16) + $sgpr0 = COPY %2(i32) ... @@ -110,10 +110,10 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[S_AND_B32_]] ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s64) = G_ZEXT %1 - $sgpr0_sgpr1 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(i64) = G_ZEXT %1(i16) + $sgpr0_sgpr1 = COPY %2(i64) ... @@ -133,9 +133,9 @@ body: | ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s64) = G_ZEXT %0 - $sgpr0_sgpr1 = COPY %1 + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i64) = G_ZEXT %0(i32) + $sgpr0_sgpr1 = COPY %1(i64) ... @@ -148,9 +148,9 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:vgpr(s32) = COPY $vgpr0 -# %1:vcc(s1) = G_ICMP intpred(eq), %0, %0 -# %2:vgpr(s32) = G_ZEXT %1 +# %0:vgpr(i32) = COPY $vgpr0 +# %1:vcc(i1) = G_ICMP intpred(eq), %0, %0 +# %2:vgpr(i32) = G_ZEXT %1 # $vgpr0 = COPY %2 # ... @@ -170,11 +170,11 @@ body: | ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], 0, 16, implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vgpr(s16) = G_ZEXT %1 - %3:vgpr(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i16) = G_ZEXT %1(i1) + %3:vgpr(i32) = G_SEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -192,10 +192,10 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e32_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vgpr(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vgpr(i32) = G_ZEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -214,10 +214,10 @@ body: | ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... @@ -236,8 +236,8 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:sreg_32(s1) = G_TRUNC %0 - %2:sgpr(s32) = G_ZEXT %1 - $sgpr0 = COPY %2 + %0:sgpr(i32) = COPY $sgpr0 + %1:sreg_32(i1) = G_TRUNC %0(i32) + %2:sgpr(i32) = G_ZEXT %1(i1) + $sgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir index 29671c13e173f..76c1004e3d68d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir @@ -20,7 +20,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1 @@ -28,18 +28,18 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_ZEXTLOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -58,7 +58,7 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i16), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] ; ; GFX7-LABEL: name: zextload_local_s32_from_s16_align2 @@ -66,18 +66,18 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (i16), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] ; ; GFX9-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (i16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:vgpr(i32) = G_ZEXTLOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0 = COPY %1(i32) ... @@ -92,8 +92,8 @@ body: | # liveins: $vgpr0 # %0:vgpr(p3) = COPY $vgpr0 -# %1:vgpr(s16) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3) -# %2:vgpr(s32) = G_ANYEXT %1 +# %1:vgpr(i16) = G_ZEXTLOAD %0 :: (load (i8), align 1, addrspace 3) +# %2:vgpr(i32) = G_ANYEXT %1 # $vgpr0 = COPY %2 # ... @@ -115,7 +115,7 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 @@ -123,19 +123,19 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (i8), addrspace 3) ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] ; ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (i8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_CONSTANT i32 4095 - %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_ZEXTLOAD %2 :: (load (s8), align 1, addrspace 3) - $vgpr0 = COPY %3 + %1:vgpr(i32) = G_CONSTANT i32 4095 + %2:vgpr(p3) = G_PTR_ADD %0, %1(i32) + %3:vgpr(i32) = G_ZEXTLOAD %2(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir index a9fe80eb47e76..25f3d6bf964eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir @@ -14,26 +14,26 @@ body: | ; SI-LABEL: name: test_abs_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[ABS]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[ABS]](i32) ; ; VI-LABEL: name: test_abs_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[ABS]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[ABS]](i32) ; ; GFX9-LABEL: name: test_abs_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[ABS]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_ABS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[ABS]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_ABS %0 + $vgpr0 = COPY %1(i32) ... --- @@ -45,47 +45,47 @@ body: | ; SI-LABEL: name: test_abs_s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; SI-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; SI-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; SI-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; SI-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](i64) ; ; VI-LABEL: name: test_abs_s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; VI-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; VI-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; VI-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; VI-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](i64) ; ; GFX9-LABEL: name: test_abs_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ABS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_ABS %0 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -97,33 +97,33 @@ body: | ; SI-LABEL: name: test_abs_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: $vgpr0 = COPY [[ABS]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; SI-NEXT: $vgpr0 = COPY [[ABS]](i32) ; ; VI-LABEL: name: test_abs_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_abs_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_ABS %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_ABS %1 + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -135,37 +135,37 @@ body: | ; SI-LABEL: name: test_abs_s8 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: $vgpr0 = COPY [[ABS]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; SI-NEXT: $vgpr0 = COPY [[ABS]](i32) ; ; VI-LABEL: name: test_abs_s8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[ASHR]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[ASHR]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_abs_s8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s8) = G_ABS %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i8) = G_ABS %1 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -177,31 +177,31 @@ body: | ; SI-LABEL: name: test_abs_s17 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: $vgpr0 = COPY [[ABS]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; SI-NEXT: $vgpr0 = COPY [[ABS]](i32) ; ; VI-LABEL: name: test_abs_s17 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; VI-NEXT: $vgpr0 = COPY [[ABS]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; VI-NEXT: $vgpr0 = COPY [[ABS]](i32) ; ; GFX9-LABEL: name: test_abs_s17 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; GFX9-NEXT: $vgpr0 = COPY [[ABS]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s17) = G_TRUNC %0 - %2:_(s17) = G_ABS %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; GFX9-NEXT: $vgpr0 = COPY [[ABS]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i17) = G_TRUNC %0(i32) + %2:_(i17) = G_ABS %1 + %3:_(i32) = G_ANYEXT %2(i17) + $vgpr0 = COPY %3(i32) ... --- @@ -213,35 +213,35 @@ body: | ; SI-LABEL: name: test_abs_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[UV]] - ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[UV]] + ; SI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_abs_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[UV]] - ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[UV]] + ; VI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_abs_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[UV]] - ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_ABS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[UV]] + ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_ABS %0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -253,38 +253,38 @@ body: | ; SI-LABEL: name: test_abs_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[UV]] - ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[UV1]] - ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[UV2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32), [[ABS2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[UV]] + ; SI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[UV1]] + ; SI-NEXT: [[ABS2:%[0-9]+]]:_(i32) = G_ABS [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32), [[ABS2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_abs_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[UV]] - ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[UV1]] - ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[UV2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32), [[ABS2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[UV]] + ; VI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[UV1]] + ; VI-NEXT: [[ABS2:%[0-9]+]]:_(i32) = G_ABS [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32), [[ABS2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_abs_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[UV]] - ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[UV1]] - ; GFX9-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[UV2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32), [[ABS2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_ABS %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[UV]] + ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[UV1]] + ; GFX9-NEXT: [[ABS2:%[0-9]+]]:_(i32) = G_ABS [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32), [[ABS2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = G_ABS %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -296,49 +296,49 @@ body: | ; SI-LABEL: name: test_abs_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ABS]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ABS1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ABS]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ABS1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; VI-LABEL: name: test_abs_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ABS]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ABS1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; VI-NEXT: [[ABS1:%[0-9]+]]:_(i16) = G_ABS [[TRUNC1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ABS]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ABS1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: test_abs_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[ABS]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_ABS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x i16>) = G_ABS [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[ABS]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_ABS %0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -350,61 +350,61 @@ body: | ; SI-LABEL: name: test_abs_v3s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32), [[ABS2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[ABS2:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ABS]](i32), [[ABS1]](i32), [[ABS2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_abs_v3s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC1]] - ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s16) = G_ABS [[TRUNC2]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; VI-NEXT: [[ABS1:%[0-9]+]]:_(i16) = G_ABS [[TRUNC1]] + ; VI-NEXT: [[ABS2:%[0-9]+]]:_(i16) = G_ABS [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS1]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS2]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_abs_v3s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[UV]] - ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ABS]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_ABS %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - S_NOP 0, implicit %2 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x i16>) = G_ABS [[UV]] + ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[ABS]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ABS1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST1]](i32), [[LSHR]](i32), [[ANYEXT]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<3 x i16>) = G_ABS %0 + %2:_(<3 x i32>) = G_ANYEXT %1(<3 x i16>) + S_NOP 0, implicit %2(<3 x i32>) ... --- @@ -416,76 +416,76 @@ body: | ; SI-LABEL: name: test_abs_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG2]] - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[ABS3:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG3]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ABS]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ABS1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ABS2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ABS3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG]] + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ABS1:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[ABS2:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG2]] + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[ABS3:%[0-9]+]]:_(i32) = G_ABS [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ABS]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ABS1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ABS2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ABS3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_abs_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC1]] - ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s16) = G_ABS [[TRUNC2]] - ; VI-NEXT: [[ABS3:%[0-9]+]]:_(s16) = G_ABS [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ABS]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ABS1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ABS2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ABS3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(i16) = G_ABS [[TRUNC]] + ; VI-NEXT: [[ABS1:%[0-9]+]]:_(i16) = G_ABS [[TRUNC1]] + ; VI-NEXT: [[ABS2:%[0-9]+]]:_(i16) = G_ABS [[TRUNC2]] + ; VI-NEXT: [[ABS3:%[0-9]+]]:_(i16) = G_ABS [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ABS]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ABS1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ABS2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ABS3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_abs_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[UV]] - ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(<2 x s16>) = G_ABS [[UV1]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ABS]](<2 x s16>), [[ABS1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_ABS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x i16>) = G_ABS [[UV]] + ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(<2 x i16>) = G_ABS [[UV1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[ABS]](<2 x i16>), [[ABS1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_ABS %0 + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir index 7faf05e0aaf30..aaa77d3f1492d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -14,30 +14,30 @@ body: | ; GFX6-LABEL: name: test_add_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX8-LABEL: name: test_add_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX9-LABEL: name: test_add_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_ADD %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_ADD %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -49,42 +49,42 @@ body: | ; GFX6-LABEL: name: test_add_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_add_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_add_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_ADD %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -96,41 +96,41 @@ body: | ; GFX6-LABEL: name: test_add_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX8-LABEL: name: test_add_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; ; GFX9-LABEL: name: test_add_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ADD %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ADD %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -142,57 +142,57 @@ body: | ; GFX6-LABEL: name: test_add_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: test_add_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_add_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_ADD %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x i16>) = G_ADD [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_ADD %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -203,87 +203,87 @@ body: | ; GFX6-LABEL: name: test_add_v3s16 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY3]] - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY4]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) - ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY3]] + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ADD]](i32) + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[COPY4]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[ADD1]](i32) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[COPY2]], [[COPY5]] + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[ADD2]](i32) + ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16) ; ; GFX8-LABEL: name: test_add_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ADD]](s16), implicit [[ADD1]](s16), implicit [[ADD2]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ADD]](i16), implicit [[ADD1]](i16), implicit [[ADD2]](i16) ; ; GFX9-LABEL: name: test_add_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ADD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC6]](s16), implicit [[TRUNC7]](s16), implicit [[TRUNC8]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s16) = G_TRUNC %0 - %7:_(s16) = G_TRUNC %1 - %8:_(s16) = G_TRUNC %2 - %9:_(s16) = G_TRUNC %3 - %10:_(s16) = G_TRUNC %4 - %11:_(s16) = G_TRUNC %5 - %12:_(<3 x s16>) = G_BUILD_VECTOR %6, %7, %8 - %13:_(<3 x s16>) = G_BUILD_VECTOR %9, %10, %11 - %14:_(<3 x s16>) = G_ADD %12, %13 - %15:_(s16), %16:_(s16), %17:_(s16) = G_UNMERGE_VALUES %14 - S_ENDPGM 0, implicit %15, implicit %16, implicit %17 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x i16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x i16>) = G_ADD [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[ADD]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[ADD1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC6]](i16), implicit [[TRUNC7]](i16), implicit [[TRUNC8]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i16) = G_TRUNC %0(i32) + %7:_(i16) = G_TRUNC %1(i32) + %8:_(i16) = G_TRUNC %2(i32) + %9:_(i16) = G_TRUNC %3(i32) + %10:_(i16) = G_TRUNC %4(i32) + %11:_(i16) = G_TRUNC %5(i32) + %12:_(<3 x i16>) = G_BUILD_VECTOR %6(i16), %7(i16), %8(i16) + %13:_(<3 x i16>) = G_BUILD_VECTOR %9(i16), %10(i16), %11(i16) + %14:_(<3 x i16>) = G_ADD %12, %13 + %15:_(i16), %16:_(i16), %17:_(i16) = G_UNMERGE_VALUES %14(<3 x i16>) + S_ENDPGM 0, implicit %15(i16), implicit %16(i16), implicit %17(i16) ... --- @@ -295,93 +295,93 @@ body: | ; GFX6-LABEL: name: test_add_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR2]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[LSHR1]], [[LSHR3]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD3]], [[C1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: test_add_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ADD3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_add_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[UV]], [[UV2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ADD]](<2 x s16>), [[ADD1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x i16>) = G_ADD [[UV]], [[UV2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x i16>) = G_ADD [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[ADD]](<2 x i16>), [[ADD1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_ADD %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -393,42 +393,42 @@ body: | ; GFX6-LABEL: name: test_add_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX8-LABEL: name: test_add_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_ADD %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -440,45 +440,45 @@ body: | ; GFX6-LABEL: name: test_add_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX8-LABEL: name: test_add_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_add_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_ADD %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_ADD %2, %3 + %5:_(i32) = G_ZEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -490,33 +490,33 @@ body: | ; GFX6-LABEL: name: test_add_s24 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX8-LABEL: name: test_add_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX9-LABEL: name: test_add_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24) = G_ADD %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24) = G_ADD %2, %3 + %5:_(i32) = G_ANYEXT %4(i24) + $vgpr0 = COPY %5(i32) ... --- @@ -527,45 +527,45 @@ body: | ; GFX6-LABEL: name: test_add_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX8-LABEL: name: test_add_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_add_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_ADD %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_ADD %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -577,43 +577,43 @@ body: | ; GFX6-LABEL: name: test_add_s96 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV3]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) ; ; GFX8-LABEL: name: test_add_s96 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) ; ; GFX9-LABEL: name: test_add_s96 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s96) = G_ADD %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i96) = G_ADD %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 0b3b4288556bf..5b13e289c3863 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -22,8 +22,8 @@ body: | ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_ADDRSPACE_CAST %0(p0) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -43,8 +43,8 @@ body: | ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p1) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -63,8 +63,8 @@ body: | ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p4) = G_BITCAST [[COPY]](p0) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_ADDRSPACE_CAST %0(p0) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -83,8 +83,8 @@ body: | ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p4) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p4) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -103,8 +103,8 @@ body: | ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p0) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p999) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p999) = G_ADDRSPACE_CAST %0(p0) + $vgpr0_vgpr1 = COPY %1(p999) ... --- @@ -123,8 +123,8 @@ body: | ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p999) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p999) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -142,32 +142,33 @@ body: | ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5) - ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; SIVI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](i64) + ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY1]](p5) + ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[LOAD]](i32) ; SIVI-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; SIVI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C1]] - ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] + ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C1]] + ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[MV]], [[C2]] ; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + ; ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) - ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(i64) = S_MOV_B64 $src_private_base + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[S_MOV_B64_]](i64) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[UV1]](i32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[MV]], [[C1]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) %0:_(p5) = COPY $vgpr0 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p5) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -186,12 +187,12 @@ body: | ; GCN-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](i1), [[EXTRACT]], [[C]] ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + %1:_(p5) = G_ADDRSPACE_CAST %0(p0) + $vgpr0 = COPY %1(p5) ... --- @@ -210,32 +211,33 @@ body: | ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) - ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; SIVI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](i64) + ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 64, addrspace 4) + ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY1]](p3) + ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[LOAD]](i32) ; SIVI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; SIVI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C1]] - ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] + ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C1]] + ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[MV]], [[C2]] ; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + ; ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) - ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(i64) = S_MOV_B64 $src_shared_base + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[S_MOV_B64_]](i64) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[UV1]](i32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[MV]], [[C1]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) %0:_(p3) = COPY $vgpr0 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p3) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -254,12 +256,12 @@ body: | ; GCN-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[EXTRACT]], [[C]] ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](p3) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + %1:_(p3) = G_ADDRSPACE_CAST %0(p0) + $vgpr0 = COPY %1(p3) ... --- @@ -281,8 +283,8 @@ body: | ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[BITCAST]](p1), [[BITCAST1]](p1) ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x p1>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_ADDRSPACE_CAST %0(<2 x p0>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -304,8 +306,8 @@ body: | ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[BITCAST]](p0), [[BITCAST1]](p0) ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0(<2 x p1>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p0>) ... --- @@ -325,16 +327,16 @@ body: | ; GCN-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]] - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]] + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[EXTRACT]], [[C]] ; GCN-NEXT: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0 - ; GCN-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C1]] - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C]] + ; GCN-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p0), [[C1]] + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](i1), [[EXTRACT1]], [[C]] ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x p3>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_ADDRSPACE_CAST %0(<2 x p0>) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -353,48 +355,49 @@ body: | ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; SIVI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) - ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; SIVI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](i64) + ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), align 64, addrspace 4) + ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[UV]](p3) + ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[LOAD]](i32) ; SIVI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; SIVI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]] - ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] + ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]] + ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[MV]], [[C2]] ; SIVI-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; SIVI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C]](s64) - ; SIVI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) - ; SIVI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; SIVI-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) - ; SIVI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]] - ; SIVI-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C2]] + ; SIVI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C]](i64) + ; SIVI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (i32), align 64, addrspace 4) + ; SIVI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(i32) = G_PTRTOINT [[UV1]](p3) + ; SIVI-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](i32), [[LOAD1]](i32) + ; SIVI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]] + ; SIVI-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](i1), [[MV1]], [[C2]] ; SIVI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) ; SIVI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) + ; ; GFX9-LABEL: name: test_addrspacecast_v2p3_to_v2p0 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) - ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) - ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(i64) = S_MOV_B64 $src_shared_base + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[S_MOV_B64_]](i64) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[UV]](p3) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[UV3]](i32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] - ; GFX9-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_1]](s64) - ; GFX9-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[MV]], [[C1]] + ; GFX9-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64(i64) = S_MOV_B64 $src_shared_base + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[S_MOV_B64_1]](i64) + ; GFX9-NEXT: [[PTRTOINT1:%[0-9]+]]:_(i32) = G_PTRTOINT [[UV1]](p3) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](i32), [[UV5]](i32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](i1), [[MV1]], [[C1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 - %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0(<2 x p3>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p0>) ... --- @@ -410,8 +413,8 @@ body: | ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[COPY]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p6) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + %1:_(p6) = G_ADDRSPACE_CAST %0(p4) + $vgpr0 = COPY %1(p6) ... --- @@ -424,13 +427,13 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p6) = COPY $vgpr0 - %1:_(p4) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_ADDRSPACE_CAST %0(p6) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -445,13 +448,13 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -559038737 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -559038737 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p6) = COPY $vgpr0 - %1:_(p4) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_ADDRSPACE_CAST %0(p6) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -467,8 +470,8 @@ body: | ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[COPY]](p0), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p6) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + %1:_(p6) = G_ADDRSPACE_CAST %0(p0) + $vgpr0 = COPY %1(p6) ... --- @@ -481,13 +484,13 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(p6) = COPY $vgpr0 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p6) + $vgpr0_vgpr1 = COPY %1(p0) ... --- name: test_addrspacecast_p5_fi_to_p0 @@ -504,20 +507,21 @@ body: | ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; SIVI-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5) - ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; SIVI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 68 + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (i32), addrspace 4) + ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[FRAME_INDEX]](p5) + ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[LOAD]](i32) ; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) + ; ; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0 ; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) - ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(i64) = S_MOV_B64 $src_private_base + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[S_MOV_B64_]](i64) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[FRAME_INDEX]](p5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](i32), [[UV1]](i32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(p5) = G_FRAME_INDEX %stack.0 - %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_ADDRSPACE_CAST %0(p5) + $vgpr0_vgpr1 = COPY %1(p0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir index b7e52cadd8cd1..0754dc91a2643 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir @@ -2,12 +2,12 @@ # Make sure incorrect usage of control flow intrinsics fails to select in case some transform separated the intrinsic from its branch. -# ERR: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use) +# ERR: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: brcond_si_if_different_block) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: si_if_not_brcond_user) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: si_if_multi_user) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: brcond_si_if_xor_0) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: brcond_si_if_or_neg1) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: brcond_si_if_negated_multi_use) --- @@ -16,10 +16,10 @@ body: | bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 bb.1: G_BRCOND %3, %bb.1 @@ -31,11 +31,11 @@ name: si_if_not_brcond_user body: | bb.0: liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s32) = G_SELECT %3, %0, %1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(i32) = G_SELECT %3, %0, %1 S_ENDPGM 0, implicit %5 ... @@ -45,11 +45,11 @@ name: si_if_multi_user body: | bb.0: liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s32) = G_SELECT %3, %0, %1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(i32) = G_SELECT %3, %0, %1 G_BRCOND %3, %bb.1 bb.1: @@ -64,12 +64,12 @@ body: | bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s1) = G_CONSTANT i1 false - %6:_(s1) = G_XOR %3, %5 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(i1) = G_CONSTANT i1 false + %6:_(i1) = G_XOR %3, %5 G_BRCOND %6, %bb.2 G_BR %bb.3 @@ -90,12 +90,12 @@ body: | bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s1) = G_CONSTANT i1 true - %6:_(s1) = G_OR %3, %5 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(i1) = G_CONSTANT i1 true + %6:_(i1) = G_OR %3, %5 G_BRCOND %6, %bb.2 G_BR %bb.3 @@ -115,12 +115,12 @@ body: | bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s1) = G_CONSTANT i1 true - %6:_(s1) = G_XOR %3, %5 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(i1) = G_CONSTANT i1 true + %6:_(i1) = G_XOR %3, %5 S_NOP 0, implicit %6 G_BRCOND %6, %bb.2 G_BR %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir index 9716bb31db3fd..90567829f435e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir @@ -2,7 +2,7 @@ # Make sure there's no crash if there is somehow no successor block. -# ERR: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block) +# ERR: remark: :0:0: unable to legalize instruction: %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(i1) (in function: brcond_si_if_no_succ_block) --- name: brcond_si_if_no_succ_block @@ -13,9 +13,9 @@ body: | bb.1: successors: %bb.1 liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0, %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 G_BRCOND %3, %bb.1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.rsq.clamp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.rsq.clamp.mir index 93b605948165e..c67b95bf7ab98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.rsq.clamp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.rsq.clamp.mir @@ -16,22 +16,29 @@ body: | ; SI-LABEL: name: test_rsq_clamp_flags_ieee_on_f32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_rsq_clamp_flags_ieee_on_f32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x47EFFFFFE0000000 - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMINNUM_IEEE [[INT]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC7EFFFFFE0000000 - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[C1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 - $vgpr0 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x47EFFFFFE0000000 + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = nnan ninf nsz G_FMINNUM_IEEE [[INT]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC7EFFFFFE0000000 + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = nnan ninf nsz G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[C1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %1(f32) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -48,20 +55,27 @@ body: | ; SI-LABEL: name: test_rsq_clamp_flags_ieee_off_f32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_rsq_clamp_flags_ieee_off_f32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x47EFFFFFE0000000 - ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMINNUM [[INT]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC7EFFFFFE0000000 - ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMAXNUM [[FMINNUM]], [[C1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 - $vgpr0 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x47EFFFFFE0000000 + ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = nnan ninf nsz G_FMINNUM [[INT]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC7EFFFFFE0000000 + ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(f32) = nnan ninf nsz G_FMAXNUM [[FMINNUM]], [[C1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %1(f32) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir index e20459cd48e87..0108bff8b5f68 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir @@ -9,11 +9,12 @@ body: | bb.0: ; WAVE64-LABEL: name: test_wavefrontsize - ; WAVE64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; WAVE64-NEXT: $vgpr0 = COPY [[C]](s32) + ; WAVE64: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; WAVE64-NEXT: $vgpr0 = COPY [[C]](i32) + ; ; WAVE32-LABEL: name: test_wavefrontsize - ; WAVE32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; WAVE32-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wavefrontsize) - $vgpr0 = COPY %0 + ; WAVE32: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; WAVE32-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wavefrontsize) + $vgpr0 = COPY %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir index 915139b590fd4..28aa61bc4daa9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir @@ -45,12 +45,12 @@ body: | ; GCN-LABEL: name: test_workitem_id_x_unpacked ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 8 - ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) - S_ENDPGM 0, implicit %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY1]], 8 + ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -66,12 +66,12 @@ body: | ; GCN-LABEL: name: test_workitem_id_y_unpacked ; GCN: liveins: $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y) - S_ENDPGM 0, implicit %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY1]], 3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -87,12 +87,12 @@ body: | ; GCN-LABEL: name: test_workitem_id_z_unpacked ; GCN: liveins: $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) - S_ENDPGM 0, implicit %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[COPY1]], 2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -107,12 +107,12 @@ body: | ; GCN-LABEL: name: test_workitem_id_x_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) - S_ENDPGM 0, implicit %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -127,14 +127,14 @@ body: | ; GCN-LABEL: name: test_workitem_id_y_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y) - S_ENDPGM 0, implicit %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -149,14 +149,14 @@ body: | ; GCN-LABEL: name: test_workitem_id_z_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) - S_ENDPGM 0, implicit %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) + S_ENDPGM 0, implicit %0(i32) ... --- @@ -164,8 +164,8 @@ name: missing_arg_info body: | bb.0: ; GCN-LABEL: name: missing_arg_info - ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]](s32) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) - S_ENDPGM 0, implicit %0 + ; GCN: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]](i32) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) + S_ENDPGM 0, implicit %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index c73471139e877..730557f432f0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: test_and_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_AND %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_AND %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -29,17 +29,17 @@ body: | ; CHECK-LABEL: name: test_and_s1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(ne), %0, %2 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s32) = G_AND %0, %1 - S_NOP 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i32) = G_AND %0, %1 + S_NOP 0, implicit %5(i32) ... --- @@ -51,31 +51,31 @@ body: | ; CHECK-LABEL: name: test_and_v2s1 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %2 - %5:_(<2 x s1>) = G_AND %3, %4 - %6:_(<2 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV4]](i32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV5]](i32), [[UV7]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i1) = G_AND [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i1) = G_AND [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[AND]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[AND1]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %1 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %2 + %5:_(<2 x i1>) = G_AND %3, %4 + %6:_(<2 x i32>) = G_ANYEXT %5(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) ... --- @@ -87,35 +87,35 @@ body: | ; CHECK-LABEL: name: test_and_v3s1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP3]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP4]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP2]], [[ICMP5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %2 - %5:_(<3 x s1>) = G_AND %3, %4 - %6:_(<3 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV6]](i32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV7]](i32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV8]](i32), [[UV11]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i1) = G_AND [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i1) = G_AND [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i1) = G_AND [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[AND]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[AND1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[AND2]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %4:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %2 + %5:_(<3 x i1>) = G_AND %3, %4 + %6:_(<3 x i32>) = G_ANYEXT %5(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %6(<3 x i32>) ... --- @@ -127,14 +127,14 @@ body: | ; CHECK-LABEL: name: test_and_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_AND %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_AND %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -146,21 +146,21 @@ body: | ; CHECK-LABEL: name: test_and_s96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s96) = G_AND %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY]](i96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY1]](i96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY1]](i96), 64 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i96) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... --- @@ -172,18 +172,18 @@ body: | ; CHECK-LABEL: name: test_and_128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(s128) = G_AND %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](i128) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[AND]](i64), [[AND1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(i128) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -195,17 +195,17 @@ body: | ; CHECK-LABEL: name: test_and_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_AND %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_AND %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -217,17 +217,17 @@ body: | ; CHECK-LABEL: name: test_and_s8 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_AND %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_AND %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -239,20 +239,20 @@ body: | ; CHECK-LABEL: name: test_and_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_AND %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[AND]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_AND %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -264,17 +264,17 @@ body: | ; CHECK-LABEL: name: test_and_s24 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24) = G_AND %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24) = G_AND %2, %3 + %5:_(i32) = G_ANYEXT %4(i24) + $vgpr0 = COPY %5(i32) ... --- @@ -286,17 +286,17 @@ body: | ; CHECK-LABEL: name: test_and_s48 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s48) = G_TRUNC %0 - %3:_(s48) = G_TRUNC %1 - %4:_(s48) = G_AND %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i48) = G_TRUNC %0(i64) + %3:_(i48) = G_TRUNC %1(i64) + %4:_(i48) = G_AND %2, %3 + %5:_(i64) = G_ANYEXT %4(i48) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -308,14 +308,14 @@ body: | ; CHECK-LABEL: name: test_and_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_AND %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_AND %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -327,21 +327,21 @@ body: | ; CHECK-LABEL: name: test_and_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_AND %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[UV4]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV6]](i32), [[UV7]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -353,18 +353,18 @@ body: | ; CHECK-LABEL: name: test_and_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = G_AND %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[AND]](<2 x i32>), [[AND1]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -373,29 +373,29 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) - %0:_(<5 x s32>) = G_IMPLICIT_DEF - %1:_(<5 x s32>) = G_IMPLICIT_DEF - %2:_(<5 x s32>) = G_AND %0, %1 - %3:_(<8 x s32>) = G_IMPLICIT_DEF - %4:_(<8 x s32>) = G_INSERT %3, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<5 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<5 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV5]](i32), [[UV6]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV7]](i32), [[UV8]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](<2 x i32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](<2 x i32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF2]](<8 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[UV10]](i32), [[UV11]](i32), [[UV12]](i32), [[UV13]](i32), [[AND2]](i32), [[UV19]](i32), [[UV20]](i32), [[UV21]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x i32>) + %0:_(<5 x i32>) = G_IMPLICIT_DEF + %1:_(<5 x i32>) = G_IMPLICIT_DEF + %2:_(<5 x i32>) = G_AND %0, %1 + %3:_(<8 x i32>) = G_IMPLICIT_DEF + %4:_(<8 x i32>) = G_INSERT %3, %2(<5 x i32>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<8 x i32>) ... --- @@ -407,18 +407,18 @@ body: | ; CHECK-LABEL: name: test_and_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_AND %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[AND]](i64), [[AND1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -430,14 +430,14 @@ body: | ; CHECK-LABEL: name: test_and_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_AND %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_AND %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -448,41 +448,41 @@ body: | ; CHECK-LABEL: name: test_and_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_AND %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x i16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[AND]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV6]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>) = G_AND %2, %4 + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) ... @@ -495,14 +495,14 @@ body: | ; CHECK-LABEL: name: test_and_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_AND %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x i16>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_AND %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -511,70 +511,70 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) - %0:_(<5 x s16>) = G_IMPLICIT_DEF - %1:_(<5 x s16>) = G_IMPLICIT_DEF - %2:_(<5 x s16>) = G_AND %0, %1 - %3:_(<8 x s16>) = G_IMPLICIT_DEF - %4:_(<8 x s16>) = G_INSERT %3, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x i16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[AND]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x i16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[AND3]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>), [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF2]](<8 x i16>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST11]], [[C]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND5]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR4]], [[C]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[UV13]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x i16>) + %0:_(<5 x i16>) = G_IMPLICIT_DEF + %1:_(<5 x i16>) = G_IMPLICIT_DEF + %2:_(<5 x i16>) = G_AND %0, %1 + %3:_(<8 x i16>) = G_IMPLICIT_DEF + %4:_(<8 x i16>) = G_INSERT %3, %2(<5 x i16>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<8 x i16>) ... --- @@ -583,20 +583,20 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s8>) = G_IMPLICIT_DEF - %1:_(<3 x s8>) = G_IMPLICIT_DEF - %2:_(<3 x s8>) = G_AND %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<4 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i8>) = G_IMPLICIT_DEF + %1:_(<3 x i8>) = G_IMPLICIT_DEF + %2:_(<3 x i8>) = G_AND %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -605,21 +605,21 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(<4 x s8>) = G_IMPLICIT_DEF - %2:_(<4 x s8>) = G_AND %0, %1 - %3:_(<4 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<4 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32), [[AND3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(<4 x i8>) = G_IMPLICIT_DEF + %2:_(<4 x i8>) = G_AND %0, %1 + %3:_(<4 x i32>) = G_ANYEXT %2(<4 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -628,25 +628,25 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v8s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) - %0:_(<8 x s8>) = G_IMPLICIT_DEF - %1:_(<8 x s8>) = G_IMPLICIT_DEF - %2:_(<8 x s8>) = G_AND %0, %1 - %3:_(<8 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<8 x i32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<8 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[UV8]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[UV9]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[UV10]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[UV11]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[UV12]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[UV13]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[UV6]], [[UV14]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV7]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32), [[AND3]](i32), [[AND4]](i32), [[AND5]](i32), [[AND6]](i32), [[AND7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) + %0:_(<8 x i8>) = G_IMPLICIT_DEF + %1:_(<8 x i8>) = G_IMPLICIT_DEF + %2:_(<8 x i8>) = G_AND %0, %1 + %3:_(<8 x i32>) = G_ANYEXT %2(<8 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<8 x i32>) ... --- @@ -655,33 +655,33 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v16s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]] - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) - %0:_(<16 x s8>) = G_IMPLICIT_DEF - %1:_(<16 x s8>) = G_IMPLICIT_DEF - %2:_(<16 x s8>) = G_AND %0, %1 - %3:_(<16 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<16 x i32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<16 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[UV16]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[UV17]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[UV18]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[UV19]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[UV20]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[UV21]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[UV6]], [[UV22]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV7]], [[UV23]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32), [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32), [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32), [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32), [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32), [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32), [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<16 x i32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32), [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32), [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32), [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32), [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32), [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32), [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32), [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<16 x i32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[UV40]], [[UV56]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[UV41]], [[UV57]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[UV42]], [[UV58]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[UV43]], [[UV59]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[UV44]], [[UV60]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[UV45]], [[UV61]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i32) = G_AND [[UV46]], [[UV62]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i32) = G_AND [[UV47]], [[UV63]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32), [[AND3]](i32), [[AND4]](i32), [[AND5]](i32), [[AND6]](i32), [[AND7]](i32), [[AND8]](i32), [[AND9]](i32), [[AND10]](i32), [[AND11]](i32), [[AND12]](i32), [[AND13]](i32), [[AND14]](i32), [[AND15]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) + %0:_(<16 x i8>) = G_IMPLICIT_DEF + %1:_(<16 x i8>) = G_IMPLICIT_DEF + %2:_(<16 x i8>) = G_AND %0, %1 + %3:_(<16 x i32>) = G_ANYEXT %2(<16 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<16 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir index 59adc212386a2..31990c4f479fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ANYEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ANYEXT %0(i32) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -27,13 +27,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s16_to_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_ANYEXT %1(i16) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -45,12 +45,12 @@ body: | ; CHECK-LABEL: name: test_anyext_s16_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -62,12 +62,12 @@ body: | ; CHECK-LABEL: name: test_anyext_s24_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i24) = G_TRUNC %0(i32) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -76,11 +76,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_anyext_s1_to_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s1) = G_CONSTANT i1 0 - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i1) = G_CONSTANT i1 false + %1:_(i32) = G_ANYEXT %0(i1) + $vgpr0 = COPY %1(i32) ... --- @@ -89,11 +89,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_anyext_s1_to_s64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s1) = G_CONSTANT i1 0 - %1:_(s64) = G_ANYEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](i64) + %0:_(i1) = G_CONSTANT i1 false + %1:_(i64) = G_ANYEXT %0(i1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -105,15 +105,15 @@ body: | ; CHECK-LABEL: name: test_anyext_v2s16_to_v2s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = G_ANYEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[LSHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i32>) = G_ANYEXT %0(<2 x i16>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -125,18 +125,18 @@ body: | ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(<3 x s32>) = G_ANYEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[LSHR]](i32), [[BITCAST1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(<3 x i32>) = G_ANYEXT %1(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -148,11 +148,11 @@ body: | ; CHECK-LABEL: name: test_anyext_v4s16_to_v4s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) - %0:_(<4 x s16>) = G_IMPLICIT_DEF - %1:_(<4 x s32>) = G_ANYEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x i32>) + %0:_(<4 x i16>) = G_IMPLICIT_DEF + %1:_(<4 x i32>) = G_ANYEXT %0(<4 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -164,15 +164,15 @@ body: | ; CHECK-LABEL: name: test_anyext_v2s32_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_ANYEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ANYEXT]](i64), [[ANYEXT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i64>) = G_ANYEXT %0(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -184,16 +184,16 @@ body: | ; CHECK-LABEL: name: test_anyext_v3s32_to_v3s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s64>) = G_ANYEXT %0 - S_NOP 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[ANYEXT]](i64), [[ANYEXT1]](i64), [[ANYEXT2]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i64>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i64>) = G_ANYEXT %0(<3 x i32>) + S_NOP 0, implicit %1(<3 x i64>) ... @@ -206,17 +206,17 @@ body: | ; CHECK-LABEL: name: test_anyext_v4s32_to_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s64>) = G_ANYEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV]](i32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV1]](i32) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV2]](i32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[ANYEXT]](i64), [[ANYEXT1]](i64), [[ANYEXT2]](i64), [[ANYEXT3]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i64>) = G_ANYEXT %0(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -228,13 +228,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s8_to_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s16) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i16) = G_ANYEXT %1(i8) + S_ENDPGM 0, implicit %2(i16) ... --- @@ -246,13 +246,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s8_to_s24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s24) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s24) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i24) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i24) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i24) = G_ANYEXT %1(i8) + S_ENDPGM 0, implicit %2(i24) ... --- @@ -264,12 +264,12 @@ body: | ; CHECK-LABEL: name: test_anyext_s7_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s32) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i32) = G_ANYEXT %1(i7) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -281,12 +281,12 @@ body: | ; CHECK-LABEL: name: test_anyext_s8_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s32) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i32) = G_ANYEXT %1(i8) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -298,16 +298,16 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s96 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96) - %0:_(s32) = COPY $vgpr0 - %1:_(s96) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV1]](i192) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i96) + %0:_(i32) = COPY $vgpr0 + %1:_(i96) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i96) ... --- @@ -319,15 +319,15 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s128 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s128) - %0:_(s32) = COPY $vgpr0 - %1:_(s128) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i128) + %0:_(i32) = COPY $vgpr0 + %1:_(i128) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -339,16 +339,16 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s160 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s160) - %0:_(s32) = COPY $vgpr0 - %1:_(s160) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i320) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i160) = G_TRUNC [[MV1]](i320) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i160) + %0:_(i32) = COPY $vgpr0 + %1:_(i160) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i160) ... --- @@ -360,15 +360,15 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s192 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s192) - %0:_(s32) = COPY $vgpr0 - %1:_(s192) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i192) + %0:_(i32) = COPY $vgpr0 + %1:_(i192) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i192) ... --- @@ -380,16 +380,16 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s224 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224) - %0:_(s32) = COPY $vgpr0 - %1:_(s224) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i224) = G_TRUNC [[MV1]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i224) + %0:_(i32) = COPY $vgpr0 + %1:_(i224) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i224) ... --- @@ -401,15 +401,15 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s256 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s256) - %0:_(s32) = COPY $vgpr0 - %1:_(s256) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i256) + %0:_(i32) = COPY $vgpr0 + %1:_(i256) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -421,15 +421,15 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s512 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s512) - %0:_(s32) = COPY $vgpr0 - %1:_(s512) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i512) + %0:_(i32) = COPY $vgpr0 + %1:_(i512) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i512) ... --- @@ -441,16 +441,16 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s992 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224) - %0:_(s32) = COPY $vgpr0 - %1:_(s224) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i224) = G_TRUNC [[MV1]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i224) + %0:_(i32) = COPY $vgpr0 + %1:_(i224) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i224) ... --- @@ -462,15 +462,15 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s1024 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s1024) - %0:_(s32) = COPY $vgpr0 - %1:_(s1024) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i1024) + %0:_(i32) = COPY $vgpr0 + %1:_(i1024) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i1024) ... --- @@ -482,13 +482,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s64_to_s128 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[COPY]](i64), [[DEF]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i128) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = G_ANYEXT %0(i64) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -500,13 +500,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s64_to_s192 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s192) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[COPY]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i192) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i192) = G_ANYEXT %0(i64) + S_ENDPGM 0, implicit %1(i192) ... --- @@ -518,13 +518,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s64_to_s256 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[COPY]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i256) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i256) = G_ANYEXT %0(i64) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -536,13 +536,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s64_to_s512 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s512) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s512) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[COPY]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i512) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i512) = G_ANYEXT %0(i64) + S_ENDPGM 0, implicit %1(i512) ... --- @@ -554,13 +554,13 @@ body: | ; CHECK-LABEL: name: test_anyext_s64_to_s1024 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s1024) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s1024) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[COPY]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i1024) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i1024) = G_ANYEXT %0(i64) + S_ENDPGM 0, implicit %1(i1024) ... --- @@ -572,16 +572,16 @@ body: | ; CHECK-LABEL: name: test_anyext_s96_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s128) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i128) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i128) = G_ANYEXT %0(i96) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -593,14 +593,14 @@ body: | ; CHECK-LABEL: name: test_anyext_s128_to_s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s256) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[UV]](i64), [[UV1]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i256) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i256) = G_ANYEXT %0(i128) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -612,48 +612,48 @@ body: | ; CHECK-LABEL: name: test_anyext_s32_to_s88 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC5]](s88) - %0:_(s32) = COPY $vgpr0 - %1:_(s88) = G_ANYEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C4]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[COPY1]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C1]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR3]](i32), [[OR4]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i704) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64), [[DEF1]](i64) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i88) = G_TRUNC [[MV1]](i704) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC5]](i88) + %0:_(i32) = COPY $vgpr0 + %1:_(i88) = G_ANYEXT %0(i32) + S_ENDPGM 0, implicit %1(i88) ... # The instruction count blows up for this and takes too long to @@ -665,8 +665,8 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:_(s32) = COPY $vgpr0 -# %1:_(s65) = G_ANYEXT %0 +# %0:_(i32) = COPY $vgpr0 +# %1:_(i65) = G_ANYEXT %0 # S_ENDPGM 0, implicit %1 # ... @@ -679,25 +679,25 @@ body: | ; CHECK-LABEL: name: test_anyext_s2_to_s112 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV1]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) - %0:_(s32) = COPY $vgpr0 - %1:_(s2) = G_TRUNC %0 - %2:_(s112) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i112) = G_TRUNC [[MV1]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i112) + %0:_(i32) = COPY $vgpr0 + %1:_(i2) = G_TRUNC %0(i32) + %2:_(i112) = G_ANYEXT %1(i2) + S_ENDPGM 0, implicit %2(i112) ... --- @@ -708,10 +708,10 @@ body: | ; CHECK-LABEL: name: test_anyext_s112_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s112) = G_TRUNC %0 - %2:_(s128) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i112) = G_TRUNC %0(i128) + %2:_(i128) = G_ANYEXT %1(i112) + S_ENDPGM 0, implicit %2(i128) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index f4aaab745e03b..6a79b823ea8ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -14,30 +14,30 @@ body: | ; SI-LABEL: name: test_ashr_s32_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_ASHR %0, %1 - $vgpr0 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_ASHR %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: test_ashr_s64_s64 @@ -48,33 +48,33 @@ body: | ; SI-LABEL: name: test_ashr_s64_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[TRUNC]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) ; ; VI-LABEL: name: test_ashr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[TRUNC]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[TRUNC]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_ASHR %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_ashr_s64_s32 @@ -85,30 +85,30 @@ body: | ; SI-LABEL: name: test_ashr_s64_s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) ; ; VI-LABEL: name: test_ashr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_ashr_s64_s16 @@ -119,37 +119,37 @@ body: | ; SI-LABEL: name: test_ashr_s64_s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) ; ; VI-LABEL: name: test_ashr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[AND]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_ASHR %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[AND]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_ASHR %0, %2(i16) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -161,39 +161,39 @@ body: | ; SI-LABEL: name: test_ashr_s16_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_ASHR %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_ASHR %2, %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -205,42 +205,42 @@ body: | ; SI-LABEL: name: test_ashr_s16_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ASHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ASHR %2, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -252,46 +252,46 @@ body: | ; SI-LABEL: name: test_ashr_s16_i8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s16_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s16) = G_ASHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[AND]](i16) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i16) = G_ASHR %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -303,50 +303,50 @@ body: | ; SI-LABEL: name: test_ashr_i8_i8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C1]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[ASHR]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_i8_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[AND]](s16) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_ASHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[AND]](i16) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_ASHR %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -358,50 +358,50 @@ body: | ; SI-LABEL: name: test_ashr_s7_s7 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C1]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[ASHR]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s7_s7 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[AND]](s16) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_ASHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[AND]](i16) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_ASHR %2, %3(i7) + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -413,42 +413,42 @@ body: | ; SI-LABEL: name: test_ashr_s24_s24 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 24 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s24_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 24 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; VI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s24_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24) = G_ASHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 24 + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24) = G_ASHR %2, %3(i24) + %5:_(i32) = G_ANYEXT %4(i24) + $vgpr0 = COPY %5(i32) ... --- @@ -460,37 +460,37 @@ body: | ; SI-LABEL: name: test_ashr_s32_s24 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; VI-LABEL: name: test_ashr_s32_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) - ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[AND]](i32) + ; VI-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %1 - %3:_(s32) = G_ASHR %0, %2 - $vgpr0 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[AND]](i32) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %1(i32) + %3:_(i32) = G_ASHR %0, %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -502,42 +502,42 @@ body: | ; SI-LABEL: name: test_ashr_v2s32_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[UV2]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[UV2]](i32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ASHR]](i32), [[ASHR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_ashr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[UV2]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[UV2]](i32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ASHR]](i32), [[ASHR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_ashr_v2s32_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[UV2]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[UV2]](i32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[UV3]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ASHR]](i32), [[ASHR1]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_ASHR %0, %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -549,45 +549,45 @@ body: | ; SI-LABEL: name: test_ashr_v3s32_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[UV3]](i32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[UV4]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[UV5]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ASHR]](i32), [[ASHR1]](i32), [[ASHR2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_ashr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[UV3]](i32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[UV4]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[UV5]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ASHR]](i32), [[ASHR1]](i32), [[ASHR2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9PLUS-LABEL: name: test_ashr_v3s32_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV4]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[UV3]](i32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[UV4]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[UV5]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ASHR]](i32), [[ASHR1]](i32), [[ASHR2]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_ASHR %0, %1(<3 x i32>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -599,42 +599,42 @@ body: | ; SI-LABEL: name: test_ashr_v2s64_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV2]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[UV2]](i32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ASHR]](i64), [[ASHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_ashr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV2]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[UV2]](i32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ASHR]](i64), [[ASHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_ashr_v2s64_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV2]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s64>) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[UV2]](i32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[UV3]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ASHR]](i64), [[ASHR1]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i64>) = G_ASHR %0, %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -646,54 +646,54 @@ body: | ; SI-LABEL: name: test_ashr_v3s64_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[UV4]](i32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[UV5]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[UV6]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[ASHR]](i64), [[ASHR1]](i64), [[ASHR2]](i64), [[UV10]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[UV4]](i32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[UV5]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[UV6]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[ASHR]](i64), [[ASHR1]](i64), [[ASHR2]](i64), [[UV10]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<3 x s64>) = G_EXTRACT %0, 0 - %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - %3:_(<3 x s64>) = G_ASHR %1, %2 - %4:_(<4 x s64>) = G_IMPLICIT_DEF - %5:_(<4 x s64>) = G_INSERT %4, %3, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[UV4]](i32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[UV5]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[UV6]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[ASHR]](i64), [[ASHR1]](i64), [[ASHR2]](i64), [[UV10]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<3 x i64>) = G_EXTRACT %0(<4 x i64>), 0 + %2:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + %3:_(<3 x i64>) = G_ASHR %1, %2(<3 x i32>) + %4:_(<4 x i64>) = G_IMPLICIT_DEF + %5:_(<4 x i64>) = G_INSERT %4, %3(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(<4 x i64>) ... --- @@ -705,60 +705,60 @@ body: | ; SI-LABEL: name: test_ashr_v2s16_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; VI-LABEL: name: test_ashr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC2]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC2]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[TRUNC3]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_ASHR %0, %1 - $vgpr0 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[COPY]], [[COPY1]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ASHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -770,67 +770,67 @@ body: | ; SI-LABEL: name: test_ashr_v2s16_v2s32 ; SI: liveins: $vgpr0, $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[UV]](s32) - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[UV1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[UV]](i32) + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[UV1]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; VI-LABEL: name: test_ashr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC2]](s16) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC2]](i16) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[TRUNC3]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC2]](s16) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s16>) = G_ASHR %0, %1 - $vgpr0 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC2]](i16) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[ASHR]](i16), [[ASHR1]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x i16>) = G_ASHR %0, %1(<2 x i32>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -841,135 +841,135 @@ body: | ; SI-LABEL: name: test_ashr_v3s16_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[AND1]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC3]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[TRUNC4]](i16) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC2]], [[TRUNC5]](i16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR2]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[UV3]], [[UV9]](<2 x s16>) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[TRUNC4]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_ASHR %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[UV3]], [[UV9]](<2 x i16>) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV13]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[ASHR1]](i16), [[TRUNC4]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>) = G_ASHR %2, %4(<3 x i16>) + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) ... @@ -982,99 +982,99 @@ body: | ; SI-LABEL: name: test_ashr_v4s16_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[LSHR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[AND]](i32) + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[LSHR2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[AND1]](i32) + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[LSHR3]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC4]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC5]](s16) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC6]](s16) - ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC]], [[TRUNC4]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC1]], [[TRUNC5]](i16) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC2]], [[TRUNC6]](i16) + ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[TRUNC3]], [[TRUNC7]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[UV]], [[UV2]](<2 x s16>) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[UV1]], [[UV3]](<2 x s16>) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_ASHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[UV]], [[UV2]](<2 x i16>) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[UV1]], [[UV3]](<2 x i16>) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[ASHR]](<2 x i16>), [[ASHR1]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_ASHR %0, %1(<4 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -1086,82 +1086,82 @@ body: | ; SI-LABEL: name: test_ashr_s128_s128 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C2]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C2]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s128 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_ZEXT %1 - %3:_(s128) = G_ASHR %0, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_ZEXT %1(i32) + %3:_(i128) = G_ASHR %0, %2(i128) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -1173,81 +1173,81 @@ body: | ; SI-LABEL: name: test_ashr_s128_s132 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C2]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C2]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s132 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1259,24 +1259,24 @@ body: | ; SI-LABEL: name: test_ashr_s128_s32_0 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) ; ; VI-LABEL: name: test_ashr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_0 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %3:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... @@ -1289,48 +1289,48 @@ body: | ; SI-LABEL: name: test_ashr_s128_s32_23 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_23 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 23 - %3:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 23 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1342,48 +1342,48 @@ body: | ; SI-LABEL: name: test_ashr_s128_s32_31 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_31 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 31 - %3:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1395,45 +1395,45 @@ body: | ; SI-LABEL: name: test_ashr_s128_s32_32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 32 - %3:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 32 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1445,48 +1445,48 @@ body: | ; SI-LABEL: name: test_ashr_s128_s32_33 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_33 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 33 - %3:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[ASHR]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1498,39 +1498,39 @@ body: | ; SI-LABEL: name: test_ashr_s128_s32_127 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[ASHR]](i64), [[ASHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_ashr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[ASHR]](i64), [[ASHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_127 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 127 - %3:_(s128) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[ASHR]](i64), [[ASHR1]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 127 + %2:_(i128) = G_ASHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1542,265 +1542,265 @@ body: | ; SI-LABEL: name: test_ashr_s256_s256 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] - ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] - ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) - ; SI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) - ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] - ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] - ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[ASHR5]], [[ASHR6]] - ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] - ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; SI-NEXT: [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT12]] - ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT13]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[ASHR3]] - ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[ASHR4]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[COPY1]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[SUB3]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C3]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[SUB2]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV2]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[COPY1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR2]], [[SHL1]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR3]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR1]], [[C4]] + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL3]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL2]], [[C4]] + ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL4]] + ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV9]], [[C3]](i32) + ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(i64) = G_ASHR [[UV9]], [[C3]](i32) + ; SI-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[SUB]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV10]], [[SUB]](i32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV11]], [[SUB9]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR5]], [[SHL5]] + ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[C3]](i32) + ; SI-NEXT: [[ASHR7:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[SUB8]](i32) + ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[ASHR7]] + ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT9]] + ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[ASHR5]], [[ASHR6]] + ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT10]] + ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; SI-NEXT: [[UV12:%[0-9]+]]:_(i64), [[UV13:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV12]], [[SELECT12]] + ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV13]], [[SELECT13]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT14]](i64), [[SELECT15]](i64) + ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT1]], [[ASHR3]] + ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[ASHR4]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) ; ; VI-LABEL: name: test_ashr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] - ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] - ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] - ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) - ; VI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) - ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] - ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] - ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[ASHR5]], [[ASHR6]] - ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] - ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; VI-NEXT: [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT12]] - ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT13]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[ASHR3]] - ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[ASHR4]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[SUB3]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C3]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[SUB2]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV2]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[COPY1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR2]], [[SHL1]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR3]] + ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR1]], [[C4]] + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB1]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL3]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL2]], [[C4]] + ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL4]] + ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV9]], [[C3]](i32) + ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(i64) = G_ASHR [[UV9]], [[C3]](i32) + ; VI-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[SUB]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV10]], [[SUB]](i32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV11]], [[SUB9]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR5]], [[SHL5]] + ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[C3]](i32) + ; VI-NEXT: [[ASHR7:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[SUB8]](i32) + ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[ASHR7]] + ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT9]] + ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[ASHR5]], [[ASHR6]] + ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT10]] + ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; VI-NEXT: [[UV12:%[0-9]+]]:_(i64), [[UV13:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV12]], [[SELECT12]] + ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV13]], [[SELECT13]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT14]](i64), [[SELECT15]](i64) + ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT1]], [[ASHR3]] + ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[ASHR4]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) ; ; GFX9PLUS-LABEL: name: test_ashr_s256_s256 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] - ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] - ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] - ; GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] - ; GFX9PLUS-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] - ; GFX9PLUS-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; GFX9PLUS-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; GFX9PLUS-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) - ; GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) - ; GFX9PLUS-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) - ; GFX9PLUS-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] - ; GFX9PLUS-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] - ; GFX9PLUS-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[ASHR5]], [[ASHR6]] - ; GFX9PLUS-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] - ; GFX9PLUS-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9PLUS-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT12]] - ; GFX9PLUS-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT13]] - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; GFX9PLUS-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[ASHR3]] - ; GFX9PLUS-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[ASHR4]] - ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s256) = G_ZEXT %1 - %3:_(s256) = G_ASHR %0, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[SUB3]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C3]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[SUB2]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV2]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR2]], [[SHL1]] + ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR3]] + ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR1]], [[C4]] + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL3]] + ; GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL2]], [[C4]] + ; GFX9PLUS-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL4]] + ; GFX9PLUS-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; GFX9PLUS-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV9]], [[C3]](i32) + ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(i64) = G_ASHR [[UV9]], [[C3]](i32) + ; GFX9PLUS-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV10]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV11]], [[SUB9]](i32) + ; GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR5]], [[SHL5]] + ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[C3]](i32) + ; GFX9PLUS-NEXT: [[ASHR7:%[0-9]+]]:_(i64) = G_ASHR [[UV11]], [[SUB8]](i32) + ; GFX9PLUS-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[ASHR7]] + ; GFX9PLUS-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT9]] + ; GFX9PLUS-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[ASHR5]], [[ASHR6]] + ; GFX9PLUS-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT10]] + ; GFX9PLUS-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(i64), [[UV13:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9PLUS-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV12]], [[SELECT12]] + ; GFX9PLUS-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV13]], [[SELECT13]] + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT14]](i64), [[SELECT15]](i64) + ; GFX9PLUS-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT1]], [[ASHR3]] + ; GFX9PLUS-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[ASHR4]] + ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i256) = G_ZEXT %1(i32) + %3:_(i256) = G_ASHR %0, %2(i256) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(i256) ... --- @@ -1812,135 +1812,135 @@ body: | ; SI-LABEL: name: test_ashr_v2s128_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[UV2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[UV2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[C2]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[UV3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL1]] - ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[C2]](s32) - ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[ASHR5]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV6]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR3]], [[ASHR4]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[UV2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[UV2]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[C2]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[UV3]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[UV3]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL1]] + ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[C2]](i32) + ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[SUB2]](i32) + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[ASHR5]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV6]], [[SELECT3]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[ASHR3]], [[ASHR4]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT4]](i64), [[SELECT5]](i64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; VI-LABEL: name: test_ashr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[UV2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[UV2]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[C2]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[UV3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL1]] - ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[C2]](s32) - ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[ASHR5]] - ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV6]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR3]], [[ASHR4]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[UV2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[UV2]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[C2]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[UV3]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[UV3]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL1]] + ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[C2]](i32) + ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[SUB2]](i32) + ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[ASHR5]] + ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV6]], [[SELECT3]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[ASHR3]], [[ASHR4]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT4]](i64), [[SELECT5]](i64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; GFX9PLUS-LABEL: name: test_ashr_v2s128_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[UV2]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[UV2]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV5]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB3]](s32) - ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL1]] - ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV7]], [[SUB2]](s32) - ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[ASHR5]] - ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV6]], [[SELECT3]] - ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR3]], [[ASHR4]] - ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s128>) = G_ASHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[UV2]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[UV2]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV5]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[UV3]](i32) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[UV3]](i32) + ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB3]](i32) + ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL1]] + ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(i64) = G_ASHR [[UV7]], [[SUB2]](i32) + ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[ASHR5]] + ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV6]], [[SELECT3]] + ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[ASHR3]], [[ASHR4]] + ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT4]](i64), [[SELECT5]](i64) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i128>) = G_ASHR %0, %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<2 x i128>) ... --- @@ -1952,101 +1952,101 @@ body: | ; SI-LABEL: name: test_ashr_s65_s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %24(s64) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C2]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %24(i64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[TRUNC]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C2]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; VI-LABEL: name: test_ashr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %24(s64) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C2]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %24(i64) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[TRUNC]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C2]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %24(s64) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s65) = G_TRUNC %0 - %3:_(s65) = G_ASHR %2, %3 - %4:_(s96) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %24(i64) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[TRUNC]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i65) = G_TRUNC %0(i96) + %3:_(i65) = G_ASHR %2, %3(i65) + %4:_(i96) = G_ANYEXT %3(i65) + $vgpr0_vgpr1_vgpr2 = COPY %4(i96) ... --- @@ -2058,98 +2058,98 @@ body: | ; SI-LABEL: name: test_ashr_s65_s32_constant8 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %24(s64) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C2]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %24(i64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[TRUNC]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C2]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; VI-LABEL: name: test_ashr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %24(s64) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C2]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %24(i64) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[TRUNC]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C2]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_constant8 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %24(s64) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC]](s32) - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s65) = G_TRUNC %0 - %3:_(s65) = G_ASHR %2, %3 - %4:_(s96) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %24(i64) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[TRUNC]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[TRUNC]](i32) + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i65) = G_TRUNC %0(i96) + %3:_(i65) = G_ASHR %2, %3(i65) + %4:_(i96) = G_ANYEXT %3(i65) + $vgpr0_vgpr1_vgpr2 = COPY %4(i96) ... --- @@ -2161,104 +2161,104 @@ body: | ; SI-LABEL: name: test_ashr_s65_s32_known_pow2 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C2]] - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SHL]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SHL]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C3]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C1]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C2]] + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SHL]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SHL]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C3]](i32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) ; ; VI-LABEL: name: test_ashr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C2]] - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SHL]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SHL]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C3]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C1]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C2]] + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SHL]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SHL]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C3]](i32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_known_pow2 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV1]], 1 - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C1]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SHL]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SHL]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SHL]](s32) - ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C3]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[SUB]](s32) - ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV]], [[SELECT]] - ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_SHL %2, %1 - %4:_(s65) = G_TRUNC %0 - %5:_(s65) = G_ASHR %4, %3 - %6:_(s96) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV1]], 1 + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C1]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SHL]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C1]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C2]] + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SHL]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SHL]](i32) + ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[SUB1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C3]](i32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[SUB]](i32) + ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[ASHR2]] + ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV]], [[SELECT]] + ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[ASHR]], [[ASHR1]] + ; GFX9PLUS-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_SHL %2, %1(i32) + %4:_(i65) = G_TRUNC %0(i96) + %5:_(i65) = G_ASHR %4, %3(i32) + %6:_(i96) = G_ANYEXT %5(i65) + $vgpr0_vgpr1_vgpr2 = COPY %6(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir index eafd1e15e2cb3..2a7cbf2519a23 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir @@ -11,18 +11,18 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[ICMP]](s1) - %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = COPY $vgpr3 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1) - S_ENDPGM 0, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (i32), addrspace 1) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](i32), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](i32), implicit [[ICMP]](i1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = COPY $vgpr3 + %3:_(i32), %4:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p1), %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (i32), addrspace 1) + S_ENDPGM 0, implicit %3(i32), implicit %4(i1) ... @@ -36,18 +36,18 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32)) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[ICMP]](s1) - %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = COPY $vgpr3 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 0) - S_ENDPGM 0, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (i32)) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](i32), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](i32), implicit [[ICMP]](i1) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = COPY $vgpr3 + %3:_(i32), %4:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p0), %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (i32)) + S_ENDPGM 0, implicit %3(i32), implicit %4(i1) ... @@ -61,17 +61,17 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[ICMP]](s1) - %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3) - S_ENDPGM 0, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (i32), addrspace 3) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](i32), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[ATOMIC_CMPXCHG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](i32), implicit [[ICMP]](i1) + %0:_(p3) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32), %4:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p3), %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (i32), addrspace 3) + S_ENDPGM 0, implicit %3(i32), implicit %4(i1) ... @@ -85,18 +85,18 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[ICMP]](s1) - %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1) - S_ENDPGM 0, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[COPY2]](i64), [[COPY1]](i64) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (i64), addrspace 1) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](i64), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](i64), implicit [[ICMP]](i1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64), %4:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p1), %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (i64), addrspace 1) + S_ENDPGM 0, implicit %3(i64), implicit %4(i1) ... @@ -110,16 +110,16 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[ICMP]](s1) - %0:_(p3) = COPY $vgpr0 - %1:_(s64) = COPY $vgpr1_vgpr2 - %2:_(s64) = COPY $vgpr3_vgpr4 - %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3) - S_ENDPGM 0, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(i64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (i64), addrspace 3) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](i64), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[ATOMIC_CMPXCHG]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](i64), implicit [[ICMP]](i1) + %0:_(p3) = COPY $vgpr0 + %1:_(i64) = COPY $vgpr1_vgpr2 + %2:_(i64) = COPY $vgpr3_vgpr4 + %3:_(i64), %4:_(i1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p3), %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (i64), addrspace 3) + S_ENDPGM 0, implicit %3(i64), implicit %4(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir index 744e3146d5f15..b908f95701343 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir @@ -12,13 +12,13 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p3), %1, %2 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -31,13 +31,13 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (i64), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p3), %1, %2 :: (load store seq_cst (i64), addrspace 3) ... --- @@ -50,14 +50,14 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p1), %1, %2 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -70,14 +70,14 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (i64), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p1), %1, %2 :: (load store seq_cst (i64), addrspace 1) ... --- @@ -91,14 +91,14 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store seq_cst (i32)) ... --- @@ -112,12 +112,12 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(i32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (i64)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store seq_cst (i64)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir index 066e358cdc18e..b05af6e93f342 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(i32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(i32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_ADD %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(i32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(i32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_ADD %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir index 7ec24588abbcc..f3e62fd7b08e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(i32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_AND %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(i32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_AND %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(i32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_AND %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(i32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_AND %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir index 195ab02571bfd..ecfd0089a03c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir @@ -2,9 +2,9 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -O0 -run-pass=legalizer %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -O0 -run-pass=legalizer %s -o - | FileCheck %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s -# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst (s32), addrspace 1) (in function: atomicrmw_fadd_global_i32) +# ERR: remark: :0:0: unable to legalize instruction: %3:_(f32) = G_ATOMICRMW_FADD %0:_(p1), %2:_ :: (load store seq_cst (f32), addrspace 1) (in function: atomicrmw_fadd_global_i32) --- name: atomicrmw_fadd_global_i32 @@ -14,9 +14,11 @@ body: | liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-LABEL: name: atomicrmw_fadd_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(f32) = G_ATOMICRMW_FADD [[COPY]](p1), [[BITCAST]] :: (load store seq_cst (f32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_ATOMICRMW_FADD %0(p1), %2 :: (load store seq_cst (f32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir index 0d3ee3f69ab2f..f074c4c1b7694 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir @@ -4,7 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s -# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p3), %1:_ :: (load store seq_cst (s32), addrspace 3) (in function: atomicrmw_fadd_local_i32) +# ERR: remark: :0:0: unable to legalize instruction: %3:_(f32) = G_ATOMICRMW_FADD %0:_(p3), %2:_ :: (load store seq_cst (f32), addrspace 3) (in function: atomicrmw_fadd_local_i32) --- name: atomicrmw_fadd_local_i32 @@ -13,10 +13,14 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 ; CHECK-LABEL: name: atomicrmw_fadd_local_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(f32) = G_ATOMICRMW_FADD [[COPY]](p3), [[BITCAST]] :: (load store seq_cst (f32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_ATOMICRMW_FADD %0(p3), %2 :: (load store seq_cst (f32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir index 8aef14b81b4d5..c2ce45d3f3aa2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MAX %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_MAX %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MAX %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_MAX %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir index 24c4bc1e2564f..8075a1ad9ee08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MIN %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_MIN %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MIN %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_MIN %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir index e90c3e14158bd..4d5a462992f9b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir @@ -3,7 +3,7 @@ # This needs to be expanded into a cmpxchg loop. # TODO: Will AtomicExpand still do this? -# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_NAND %0:_(p1), %1:_ :: (load store seq_cst (s32), addrspace 1) (in function: atomicrmw_nand_global_i32) +# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(i32) = G_ATOMICRMW_NAND %0:_(p1), %1:_ :: (load store seq_cst (i32), addrspace 1) (in function: atomicrmw_nand_global_i32) --- name: atomicrmw_nand_global_i32 @@ -13,10 +13,9 @@ body: | liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-LABEL: name: atomicrmw_nand_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_NAND:%[0-9]+]]:_(s32) = G_ATOMICRMW_NAND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK: [[ATOMICRMW_NAND:%[0-9]+]]:_(i32) = G_ATOMICRMW_NAND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_NAND %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_NAND %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir index 4cf9bea982316..71fc71a423437 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(i32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_OR %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(i32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_OR %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(i32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_OR %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(i32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_OR %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir index 8ca6f00629a64..8a9e1c562c8f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(i32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_SUB %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(i32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_SUB %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(i32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_SUB %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(i32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_SUB %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir index ff2ac484403a7..38576c94edfa7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir index ec6f5107d1420..440a9a13c54a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(i32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir index 80597c7fcfadc..f3e0eee7f7981 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -o - %s | FileCheck %s # RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s -# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_XCHG %0:_(p0), %1:_ :: (load store seq_cst (s32)) (in function: atomicrmw_xchg_flat_i32) +# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(i32) = G_ATOMICRMW_XCHG %0:_(p0), %1:_ :: (load store seq_cst (i32)) (in function: atomicrmw_xchg_flat_i32) --- @@ -15,11 +15,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(i32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -32,9 +32,9 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(i32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p0), %1 :: (load store seq_cst (i32)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir index adb3d7e823369..9e28b4da5aca6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(i32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(i32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(i32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(i32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir index c028a7cb5c44f..3ad21054c8830 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(i32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XOR %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(i32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_XOR %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... --- @@ -46,11 +46,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(i32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XOR %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -63,9 +63,9 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(i32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_XOR %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 8b5c27288453e..a3e52e641bfb5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_s32_to_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(<2 x s16>) = G_BITCAST %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(<2 x i16>) = G_BITCAST %0(i32) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -27,12 +27,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = G_BITCAST %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = G_BITCAST %0(<2 x i16>) + $vgpr0 = COPY %1(i32) ... --- @@ -44,12 +44,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_BITCAST %0(<2 x i32>) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -61,12 +61,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_BITCAST %0(i64) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -78,12 +78,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<4 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<4 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = G_BITCAST %0(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -95,12 +95,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[COPY]](<4 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x i64>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = G_BITCAST %0(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -112,12 +112,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_s128_to_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](s128) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY]](i128) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<4 x i32>) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = G_BITCAST %0(i128) + S_ENDPGM 0, implicit %1(<4 x i32>) ... --- @@ -129,12 +129,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s128) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[COPY]](<4 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](i128) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = G_BITCAST %0(<4 x i32>) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -146,12 +146,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_BITCAST %0(<4 x i16>) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -163,12 +163,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v4s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<4 x s16>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<4 x i16>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_BITCAST %0(i64) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -180,12 +180,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v8s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<8 x s16>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<8 x i16>) = G_BITCAST %0(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -197,12 +197,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s16_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<8 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) - %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[COPY]](<8 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x i64>) + %0:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = G_BITCAST %0(<8 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -218,8 +218,8 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_BITCAST %0(p0) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -235,8 +235,8 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_BITCAST %0(p1) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -252,8 +252,8 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p999) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p0) = G_BITCAST %0(p999) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -269,8 +269,8 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p123) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) %0:_(p123) = COPY $vgpr0_vgpr1 - %1:_(p999) = G_BITCAST %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(p999) = G_BITCAST %0(p123) + $vgpr0_vgpr1 = COPY %1(p999) ... --- @@ -284,12 +284,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s64_to_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY]](<4 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<8 x s32>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s32>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY]](<4 x i64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<8 x i32>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i32>) = G_BITCAST %0(<4 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -303,12 +303,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s32_to_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<8 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x s64>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s64>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[COPY]](<8 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x i64>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i64>) = G_BITCAST %0(<8 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -322,12 +322,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s64_to_v16s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<16 x s32>) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s32>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x i32>) = G_BITCAST [[COPY]](<8 x i64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<16 x i32>) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i32>) = G_BITCAST %0(<8 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -341,12 +341,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s32_to_v8s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[COPY]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<8 x s64>) = G_BITCAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i64>) = G_BITCAST [[COPY]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x i64>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<8 x i64>) = G_BITCAST %0(<16 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<8 x i64>) ... --- @@ -358,12 +358,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v32s32_to_v16s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) - %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(<16 x s64>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x i64>) = G_BITCAST [[COPY]](<32 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<16 x i64>) + %0:_(<32 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<16 x i64>) = G_BITCAST %0(<32 x i32>) + S_ENDPGM 0, implicit %1(<16 x i64>) ... --- @@ -375,12 +375,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s64_to_v32s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) - %0:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(<32 x s32>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x i32>) = G_BITCAST [[COPY]](<16 x i64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<32 x i32>) + %0:_(<16 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<32 x i32>) = G_BITCAST %0(<16 x i64>) + S_ENDPGM 0, implicit %1(<32 x i32>) ... --- @@ -392,18 +392,18 @@ body: | ; CHECK-LABEL: name: test_bitcast_s24_to_v3s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(<3 x s8>) = G_BITCAST %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[LSHR]](i32), [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i24) = G_TRUNC %0(i32) + %2:_(<3 x i8>) = G_BITCAST %1(i24) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -415,17 +415,17 @@ body: | ; CHECK-LABEL: name: test_bitcast_s48_to_v3s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_TRUNC %0 - %2:_(<3 x s16>) = G_BITCAST %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[LSHR]](i32), [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i48) = G_TRUNC %0(i64) + %2:_(<3 x i16>) = G_BITCAST %1(i48) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -437,34 +437,34 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s8_to_s24 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s8>) = G_TRUNC %0 - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i8>) = G_TRUNC %0(<3 x i32>) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -476,25 +476,25 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s16_to_s48 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(s48) = G_BITCAST %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(i48) = G_BITCAST %1(<3 x i16>) + %3:_(i64) = G_ANYEXT %2(i48) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -506,16 +506,16 @@ body: | ; CHECK-LABEL: name: test_bitcast_s16_to_v2s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(<2 x s8>) = G_BITCAST %1 - %3:_(<2 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[LSHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(<2 x i8>) = G_BITCAST %1(i16) + %3:_(<2 x i32>) = G_ANYEXT %2(<2 x i8>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -527,23 +527,23 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s8_to_s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_TRUNC %0 - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i8>) = G_TRUNC %0(<2 x i32>) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -556,37 +556,37 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_v4s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(<4 x s8>) = G_ADD %1, %1 - %3:_(<4 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(<2 x i16>) + %2:_(<4 x i8>) = G_ADD %1, %1 + %3:_(<4 x i32>) = G_ANYEXT %2(<4 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -598,42 +598,42 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s8>) = G_TRUNC %0 - %2:_(<4 x s8>) = G_ADD %1, %1 - %3:_(<2 x s16>) = G_BITCAST %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i8>) = G_TRUNC %0(<4 x i32>) + %2:_(<4 x i8>) = G_ADD %1, %1 + %3:_(<2 x i16>) = G_BITCAST %2(<4 x i8>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -645,61 +645,61 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_v8s4 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<8 x s4>) = G_BITCAST %0 - %2:_(<8 x s4>) = G_ADD %1, %1 - %3:_(<8 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C5]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C6]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i16) = COPY [[TRUNC6]](i16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i16) = COPY [[TRUNC7]](i16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<8 x i4>) = G_BITCAST %0(<2 x i16>) + %2:_(<8 x i4>) = G_ADD %1, %1 + %3:_(<8 x i32>) = G_ANYEXT %2(<8 x i4>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<8 x i32>) ... --- @@ -711,68 +711,68 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND2]], [[C2]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ADD6]], [[C]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C2]](s16) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ADD7]], [[C]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s4>) = G_TRUNC %0 - %2:_(<8 x s4>) = G_ADD %1, %1 - %3:_(<2 x s16>) = G_BITCAST %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i16) = COPY [[TRUNC6]](i16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[COPY7]], [[TRUNC6]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i16) = COPY [[TRUNC7]](i16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[COPY8]], [[TRUNC7]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND2]], [[C2]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[ADD4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[ADD5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C1]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[ADD6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND6]], [[C2]](i16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i16) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[ADD7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C3]](i16) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i16) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR5]](i16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C4]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i4>) = G_TRUNC %0(<8 x i32>) + %2:_(<8 x i4>) = G_ADD %1, %1 + %3:_(<2 x i16>) = G_BITCAST %2(<8 x i4>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -784,12 +784,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_BITCAST %0(<4 x i16>) + S_ENDPGM 0, implicit %1(<2 x i32>) ... --- @@ -801,46 +801,46 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v4s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_BITCAST %0 - %2:_(<4 x s16>) = G_ADD %1, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ADD3]](i16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_BITCAST %0(<2 x i32>) + %2:_(<4 x i16>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2(<4 x i16>) ... @@ -853,56 +853,56 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v8s8 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC8]](<8 x s8>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_BITCAST %0 - %2:_(<8 x s8>) = G_ADD %1, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i16) = COPY [[TRUNC6]](i16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i16) = COPY [[TRUNC7]](i16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x i8>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC8]](<8 x i8>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<8 x i8>) = G_BITCAST %0(<2 x i32>) + %2:_(<8 x i8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2(<8 x i8>) ... @@ -915,46 +915,46 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s8>) = G_TRUNC %0 - %2:_(<2 x s32>) = G_BITCAST %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i8) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i8) = COPY [[TRUNC2]](i8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i8) = COPY [[TRUNC3]](i8) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C2]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C3]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i8>) = G_TRUNC %0(<8 x i32>) + %2:_(<2 x i32>) = G_BITCAST %1(<8 x i8>) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -966,49 +966,49 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s8>) = G_TRUNC %0 - %2:_(s64) = G_BITCAST %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C1]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C1]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C2]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i8>) = G_TRUNC %0(<8 x i32>) + %2:_(i64) = G_BITCAST %1(<8 x i8>) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -1020,124 +1020,124 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v16s4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C6]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C4]](s32) - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) - ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ADD5]](s16) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C3]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ADD7]](s16) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[ADD8]](s16) - ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[ADD9]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C3]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[ADD10]](s16) - ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[ADD11]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C3]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[ADD12]](s16) - ; CHECK-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[ADD13]](s16) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C3]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[ADD14]](s16) - ; CHECK-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[ADD15]](s16) - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C3]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL7]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<16 x s16>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<16 x s4>) = G_BITCAST %0 - %2:_(<16 x s4>) = G_ADD %1, %1 - %3:_(<16 x s16>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C4]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C5]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C6]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C3]](i32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C4]](i32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C5]](i32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C6]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i16) = COPY [[TRUNC6]](i16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i16) = COPY [[TRUNC7]](i16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i16) = COPY [[TRUNC8]](i16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i16) = COPY [[TRUNC9]](i16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i16) = COPY [[TRUNC10]](i16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i16) = COPY [[TRUNC11]](i16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i16) = COPY [[TRUNC12]](i16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i16) = COPY [[TRUNC13]](i16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i16) = COPY [[TRUNC14]](i16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i16) = COPY [[TRUNC15]](i16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ADD3]](i16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[ADD5]](i16) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C3]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[ADD7]](i16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[ADD8]](i16) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[ADD9]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C3]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[ADD10]](i16) + ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[ADD11]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT11]], [[C3]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT10]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[ADD12]](i16) + ; CHECK-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[ADD13]](i16) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT13]], [[C3]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT12]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[ADD14]](i16) + ; CHECK-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[ADD15]](i16) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT15]], [[C3]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[ZEXT14]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<16 x i16>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<16 x i4>) = G_BITCAST %0(<2 x i32>) + %2:_(<16 x i4>) = G_ADD %1, %1 + %3:_(<16 x i16>) = G_ANYEXT %2(<16 x i4>) + S_ENDPGM 0, implicit %3(<16 x i16>) ... @@ -1150,98 +1150,98 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s4_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR6]](s32), [[OR13]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<16 x s4>) = G_TRUNC %0 - %2:_(<2 x s32>) = G_BITCAST %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<16 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i4) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i4) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i4) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i4) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i4) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i4) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i4) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i4) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C3]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C6]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i4) = COPY [[TRUNC]](i4) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i4) = COPY [[TRUNC1]](i4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i4) = COPY [[TRUNC2]](i4) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i4) = COPY [[TRUNC3]](i4) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i4) = COPY [[TRUNC4]](i4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i4) = COPY [[TRUNC5]](i4) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i4) = COPY [[TRUNC6]](i4) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i4) = COPY [[TRUNC7]](i4) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C2]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL7]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C3]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C4]](i32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[OR8]], [[SHL9]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[BITCAST6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[AND12]], [[C]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[AND13]], [[C5]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i32) = G_AND [[BITCAST7]], [[C1]] + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[AND14]], [[C6]](i32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[OR11]], [[SHL12]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[AND15]], [[C7]](i32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[OR12]], [[SHL13]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR6]](i32), [[OR13]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<16 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<16 x i4>) = G_TRUNC %0(<16 x i16>) + %2:_(<2 x i32>) = G_BITCAST %1(<16 x i4>) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -1253,43 +1253,43 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v8s8 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_BITCAST %0 - %2:_(<8 x s8>) = G_ADD %1, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<8 x i8>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](<8 x i8>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(<8 x i8>) = G_BITCAST %0(i64) + %2:_(<8 x i8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2(<8 x i8>) ... @@ -1302,75 +1302,75 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s32_to_v12s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC12]](<12 x s8>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<12 x s8>) = G_BITCAST %0 - %2:_(<12 x s8>) = G_ADD %1, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i16) = COPY [[TRUNC6]](i16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i16) = COPY [[TRUNC7]](i16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i16) = COPY [[TRUNC8]](i16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i16) = COPY [[TRUNC9]](i16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i16) = COPY [[TRUNC10]](i16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i16) = COPY [[TRUNC11]](i16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD8]](i16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD9]](i16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD10]](i16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD11]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32), [[ANYEXT8]](i32), [[ANYEXT9]](i32), [[ANYEXT10]](i32), [[ANYEXT11]](i32) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(<12 x i8>) = G_TRUNC [[BUILD_VECTOR]](<12 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC12]](<12 x i8>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<12 x i8>) = G_BITCAST %0(<3 x i32>) + %2:_(<12 x i8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2(<12 x i8>) ... @@ -1383,55 +1383,55 @@ body: | ; CHECK-LABEL: name: test_bitcast_v12s8_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2 - %4:_(<12 x s8>) = G_TRUNC %3 - %5:_(<3 x s32>) = G_BITCAST %4 - S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C2]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C3]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[UV9]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[UV10]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C2]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[UV11]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C3]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<12 x i32>) = G_CONCAT_VECTORS %0(<4 x i32>), %1(<4 x i32>), %2(<4 x i32>) + %4:_(<12 x i8>) = G_TRUNC %3(<12 x i32>) + %5:_(<3 x i32>) = G_BITCAST %4(<12 x i8>) + S_ENDPGM 0, implicit %5(<3 x i32>) ... --- @@ -1443,61 +1443,61 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<6 x s8>) = G_TRUNC %0 - %2:_(<6 x s8>) = G_ADD %1, %1 - %3:_(<3 x s16>) = G_BITCAST %2 - %4:_(<6 x s16>) = G_CONCAT_VECTORS %3, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<6 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[ADD4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[ADD5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C1]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT2]], [[C2]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x i8>) = G_TRUNC %0(<6 x i32>) + %2:_(<6 x i8>) = G_ADD %1, %1 + %3:_(<3 x i16>) = G_BITCAST %2(<6 x i8>) + %4:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %3(<3 x i16>) + S_ENDPGM 0, implicit %4(<6 x i16>) ... --- @@ -1509,45 +1509,45 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s16_to_v6s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[ADD1]], [[C]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[ADD2]], [[C]](s16) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(<3 x s16>) = G_ADD %1, %1 - %3:_(<6 x s8>) = G_BITCAST %2 - %4:_(<6 x s16>) = G_ANYEXT %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[ADD]], [[C]](i16) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[ADD1]], [[C]](i16) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[ADD2]], [[C]](i16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR]](i16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR1]](i16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR2]](i16) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C1]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(<3 x i16>) = G_ADD %1, %1 + %3:_(<6 x i8>) = G_BITCAST %2(<3 x i16>) + %4:_(<6 x i16>) = G_ANYEXT %3(<6 x i8>) + S_ENDPGM 0, implicit %4(<6 x i16>) ... --- @@ -1559,71 +1559,71 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v16s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<16 x s8>) = G_BITCAST %0 - %2:_(<16 x s8>) = G_ADD %1, %1 - %3:_(<16 x s32>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C1]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i16) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i16) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i16) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[TRUNC7]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i16) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD8]](i16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD9]](i16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD10]](i16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD11]](i16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD12]](i16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD13]](i16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD14]](i16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD15]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32), [[ANYEXT8]](i32), [[ANYEXT9]](i32), [[ANYEXT10]](i32), [[ANYEXT11]](i32), [[ANYEXT12]](i32), [[ANYEXT13]](i32), [[ANYEXT14]](i32), [[ANYEXT15]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x i8>) = G_BITCAST %0(<2 x i64>) + %2:_(<16 x i8>) = G_ADD %1, %1 + %3:_(<16 x i32>) = G_ANYEXT %2(<16 x i8>) + S_ENDPGM 0, implicit %3(<16 x i32>) ... --- @@ -1635,99 +1635,99 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) - ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]] - ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] - ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C]] - ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] - ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s64>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s8>) = G_TRUNC %0 - %2:_(<2 x s64>) = G_BITCAST %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i8) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i8) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i8) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i8) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC8]], [[C]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC12]], [[C]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C1]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C1]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C2]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i8) = COPY [[TRUNC2]](i8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i8) = COPY [[TRUNC3]](i8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i8) = COPY [[TRUNC6]](i8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i8) = COPY [[TRUNC7]](i8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[UV8]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC16]], [[C]] + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[UV9]](i32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[TRUNC17]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[AND9]], [[C1]](i16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[UV10]](i32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(i16) = G_TRUNC [[UV11]](i32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[TRUNC19]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i16) = G_SHL [[AND11]], [[C1]](i16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[UV12]](i32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[TRUNC20]], [[C]] + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(i16) = G_TRUNC [[UV13]](i32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i16) = G_AND [[TRUNC21]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i16) = G_SHL [[AND13]], [[C1]](i16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[UV14]](i32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(i16) = G_TRUNC [[UV15]](i32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[TRUNC23]], [[C]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i16) = G_SHL [[AND15]], [[C1]](i16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i16) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[OR6]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[OR7]](i16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C2]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[OR8]](i16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[OR9]](i16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C2]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR10]](i32), [[OR11]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i64>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i8>) = G_TRUNC %0(<16 x i32>) + %2:_(<2 x i64>) = G_BITCAST %1(<16 x i8>) + S_ENDPGM 0, implicit %2(<2 x i64>) ... --- @@ -1738,94 +1738,94 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_v16s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<16 x s8>) = G_BITCAST %0 - %2:_(<16 x s8>) = G_ADD %1, %1 - %3:_(<16 x s32>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[TRUNC2]](i16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[TRUNC3]](i16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i16) = COPY [[TRUNC4]](i16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i16) = COPY [[TRUNC5]](i16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i16) = COPY [[TRUNC6]](i16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i16) = COPY [[TRUNC7]](i16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i16) = COPY [[TRUNC8]](i16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i16) = COPY [[TRUNC9]](i16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i16) = COPY [[TRUNC10]](i16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i16) = COPY [[TRUNC11]](i16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i16) = COPY [[TRUNC12]](i16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i16) = COPY [[TRUNC13]](i16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i16) = COPY [[TRUNC14]](i16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i16) = COPY [[TRUNC15]](i16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD8]](i16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD9]](i16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD10]](i16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD11]](i16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD12]](i16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD13]](i16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD14]](i16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD15]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32), [[ANYEXT8]](i32), [[ANYEXT9]](i32), [[ANYEXT10]](i32), [[ANYEXT11]](i32), [[ANYEXT12]](i32), [[ANYEXT13]](i32), [[ANYEXT14]](i32), [[ANYEXT15]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x i8>) = G_BITCAST %0(<4 x i32>) + %2:_(<16 x i8>) = G_ADD %1, %1 + %3:_(<16 x i32>) = G_ANYEXT %2(<16 x i8>) + S_ENDPGM 0, implicit %3(<16 x i32>) ... --- @@ -1837,94 +1837,94 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s8>) = G_TRUNC %0 - %2:_(<4 x s32>) = G_BITCAST %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i8) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i8) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i8) = COPY [[TRUNC2]](i8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i8) = COPY [[TRUNC3]](i8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i8) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i8) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i8) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C2]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C3]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i8) = COPY [[TRUNC2]](i8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i8) = COPY [[TRUNC3]](i8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i8) = COPY [[TRUNC6]](i8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i8) = COPY [[TRUNC7]](i8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i8) = G_TRUNC [[UV8]](i32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[UV9]](i32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i8) = G_TRUNC [[UV10]](i32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i8) = G_TRUNC [[UV11]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[UV9]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[UV10]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C2]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[UV11]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C3]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i8) = COPY [[TRUNC2]](i8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i8) = COPY [[TRUNC3]](i8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i8) = COPY [[TRUNC6]](i8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i8) = COPY [[TRUNC7]](i8) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i8) = COPY [[TRUNC10]](i8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i8) = COPY [[TRUNC11]](i8) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[UV12]], [[C]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[UV13]], [[C]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND13]], [[C1]](i32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i32) = G_AND [[UV14]], [[C]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[AND14]], [[C2]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i32) = G_AND [[UV15]], [[C]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[AND15]], [[C3]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x i32>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i8>) = G_TRUNC %0(<16 x i32>) + %2:_(<4 x i32>) = G_BITCAST %1(<16 x i8>) + S_ENDPGM 0, implicit %2(<4 x i32>) ... --- @@ -1936,73 +1936,73 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s16_to_v16s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR6]], [[LSHR6]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR7]], [[LSHR7]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) - %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<16 x s8>) = G_BITCAST %0 - %2:_(<16 x s8>) = G_ADD %1, %1 - %3:_(<16 x s32>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<8 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C1]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[LSHR6]], [[LSHR6]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[LSHR7]], [[LSHR7]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i16) = G_ADD [[TRUNC4]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i16) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i16) = G_ADD [[TRUNC5]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i16) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i16) = G_ADD [[TRUNC6]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i16) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i16) = G_ADD [[TRUNC7]], [[TRUNC7]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i16) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD3]](i16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD5]](i16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD7]](i16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD8]](i16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD9]](i16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD10]](i16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD11]](i16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD12]](i16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD13]](i16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD14]](i16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD15]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32), [[ANYEXT4]](i32), [[ANYEXT5]](i32), [[ANYEXT6]](i32), [[ANYEXT7]](i32), [[ANYEXT8]](i32), [[ANYEXT9]](i32), [[ANYEXT10]](i32), [[ANYEXT11]](i32), [[ANYEXT12]](i32), [[ANYEXT13]](i32), [[ANYEXT14]](i32), [[ANYEXT15]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x i32>) + %0:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x i8>) = G_BITCAST %0(<8 x i16>) + %2:_(<16 x i8>) = G_ADD %1, %1 + %3:_(<16 x i32>) = G_ANYEXT %2(<16 x i8>) + S_ENDPGM 0, implicit %3(<16 x i32>) ... --- @@ -2014,155 +2014,155 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] - ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] - ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) - ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) - ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) - ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C]] - ; CHECK-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8) - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8) - ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]] - ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s8>) = G_TRUNC %0 - %2:_(<8 x s16>) = G_BITCAST %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i8) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i8) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i8) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i8) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C1]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i8) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i8) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C1]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i8) = COPY [[TRUNC12]](i8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i8) = COPY [[TRUNC13]](i8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(i8) = G_TRUNC [[UV8]](i32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(i8) = G_TRUNC [[UV9]](i32) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[UV8]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(i16) = G_TRUNC [[UV9]](i32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[TRUNC19]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND9]], [[C1]](i16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i16) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i8) = COPY [[TRUNC12]](i8) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i8) = COPY [[TRUNC13]](i8) + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i8) = COPY [[TRUNC16]](i8) + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i8) = COPY [[TRUNC17]](i8) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(i8) = G_TRUNC [[UV10]](i32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(i8) = G_TRUNC [[UV11]](i32) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[UV10]](i32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(i16) = G_TRUNC [[UV11]](i32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[TRUNC23]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[AND11]], [[C1]](i16) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i16) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(i8) = COPY [[TRUNC12]](i8) + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(i8) = COPY [[TRUNC13]](i8) + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(i8) = COPY [[TRUNC16]](i8) + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(i8) = COPY [[TRUNC17]](i8) + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(i8) = COPY [[TRUNC20]](i8) + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(i8) = COPY [[TRUNC21]](i8) + ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(i8) = G_TRUNC [[UV12]](i32) + ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(i8) = G_TRUNC [[UV13]](i32) + ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(i16) = G_TRUNC [[UV12]](i32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[TRUNC26]], [[C]] + ; CHECK-NEXT: [[TRUNC27:%[0-9]+]]:_(i16) = G_TRUNC [[UV13]](i32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i16) = G_AND [[TRUNC27]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[AND13]], [[C1]](i16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i16) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(i8) = COPY [[TRUNC1]](i8) + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(i8) = COPY [[TRUNC4]](i8) + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(i8) = COPY [[TRUNC5]](i8) + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(i8) = COPY [[TRUNC12]](i8) + ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(i8) = COPY [[TRUNC13]](i8) + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(i8) = COPY [[TRUNC16]](i8) + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(i8) = COPY [[TRUNC17]](i8) + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(i8) = COPY [[TRUNC20]](i8) + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(i8) = COPY [[TRUNC21]](i8) + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(i8) = COPY [[TRUNC24]](i8) + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(i8) = COPY [[TRUNC25]](i8) + ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(i16) = G_TRUNC [[UV14]](i32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[TRUNC28]], [[C]] + ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(i16) = G_TRUNC [[UV15]](i32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[TRUNC29]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i16) = G_SHL [[AND15]], [[C1]](i16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i16) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C2]](i32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[OR5]](i16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C2]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[OR6]](i16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[OR7]](i16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C2]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR11]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i8>) = G_TRUNC %0(<16 x i32>) + %2:_(<8 x i16>) = G_BITCAST %1(<16 x i8>) + S_ENDPGM 0, implicit %2(<8 x i16>) ... --- @@ -2174,12 +2174,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v6s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) - %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<6 x s32>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i32>) = G_BITCAST [[COPY]](<3 x i64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x i32>) + %0:_(<3 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x i32>) = G_BITCAST %0(<3 x i64>) + S_ENDPGM 0, implicit %1(<6 x i32>) ... --- @@ -2191,12 +2191,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s32_to_v3s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) - %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<3 x s64>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[COPY]](<6 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x i64>) + %0:_(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x i64>) = G_BITCAST %0(<6 x i32>) + S_ENDPGM 0, implicit %1(<3 x i64>) ... --- @@ -2208,12 +2208,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v12s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) - %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<12 x s16>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<12 x i16>) = G_BITCAST [[COPY]](<3 x i64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<12 x i16>) + %0:_(<3 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<12 x i16>) = G_BITCAST %0(<3 x i64>) + S_ENDPGM 0, implicit %1(<12 x i16>) ... --- @@ -2225,12 +2225,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_v12s16_to_v3s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) - %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<3 x s64>) = G_BITCAST %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[COPY]](<12 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x i64>) + %0:_(<12 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x i64>) = G_BITCAST %0(<12 x i16>) + S_ENDPGM 0, implicit %1(<3 x i64>) ... --- @@ -2242,104 +2242,104 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v24s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<3 x s64>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR10]](s16) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR11]](s16) - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL7]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] - ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR14]](s16) - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR15]](s16) - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[LSHR12]], [[SHL9]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] - ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR16]](s16) - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL10]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR17]](s16) - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[LSHR13]], [[SHL11]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) - %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<24 x s8>) = G_BITCAST %0 - %2:_(<24 x s16>) = G_ANYEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<3 x i64>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C1]](i16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[UV8]](i32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[UV8]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; CHECK-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR15:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR16:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC10]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR17:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC11]], [[C1]](i16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C2]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR2]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR3]](i16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[C2]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR4]](i16) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT2]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR5]](i16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C2]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR8]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT4]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR9]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR6]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV6]], [[C2]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR10]](i16) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT6]], [[C]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND3]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR11]](i16) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[LSHR7]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV7]], [[C2]] + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR14]](i16) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[ZEXT8]], [[C]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR15]](i16) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C]](i32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[LSHR12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV8]], [[C2]] + ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR16]](i16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXT10]], [[C]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[AND5]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR17]](i16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT11]], [[C]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[LSHR13]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR11]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>), [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x i16>) + %0:_(<3 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<24 x i8>) = G_BITCAST %0(<3 x i64>) + %2:_(<24 x i16>) = G_ANYEXT %1(<24 x i8>) + S_ENDPGM 0, implicit %2(<24 x i16>) ... --- @@ -2351,144 +2351,144 @@ body: | ; CHECK-LABEL: name: test_bitcast_v24s8_to_v3s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C1]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C1]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C2]](s16) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C1]] - ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C1]] - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C2]](s16) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C1]] - ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C1]] - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C2]](s16) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C1]] - ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C1]] - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C2]](s16) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C1]] - ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C1]] - ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C2]](s16) - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - %2:_(<24 x s16>) = G_CONCAT_VECTORS %0, %1 - %3:_(<24 x s8>) = G_TRUNC %2 - %4:_(<3 x s64>) = G_BITCAST %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<12 x i16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<12 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C2]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C2]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C2]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C2]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<12 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC8]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[TRUNC9]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[AND9]], [[C2]](i16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i16) = G_AND [[TRUNC10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[TRUNC11]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i16) = G_SHL [[AND11]], [[C2]](i16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[TRUNC12]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i16) = G_AND [[TRUNC13]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i16) = G_SHL [[AND13]], [[C2]](i16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[TRUNC14]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[TRUNC15]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i16) = G_SHL [[AND15]], [[C2]](i16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i16) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[OR6]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[OR7]](i16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[OR8]](i16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[OR9]](i16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR10]](i32), [[OR11]](i32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV11]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST11]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(i16) = G_AND [[TRUNC16]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(i16) = G_AND [[TRUNC17]], [[C1]] + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(i16) = G_SHL [[AND17]], [[C2]](i16) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(i16) = G_OR [[AND16]], [[SHL12]] + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(i16) = G_AND [[TRUNC18]], [[C1]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(i16) = G_AND [[TRUNC19]], [[C1]] + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(i16) = G_SHL [[AND19]], [[C2]](i16) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(i16) = G_OR [[AND18]], [[SHL13]] + ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(i16) = G_AND [[TRUNC20]], [[C1]] + ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(i16) = G_AND [[TRUNC21]], [[C1]] + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(i16) = G_SHL [[AND21]], [[C2]](i16) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(i16) = G_OR [[AND20]], [[SHL14]] + ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(i16) = G_AND [[TRUNC22]], [[C1]] + ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(i16) = G_AND [[TRUNC23]], [[C1]] + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(i16) = G_SHL [[AND23]], [[C2]](i16) + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(i16) = G_OR [[AND22]], [[SHL15]] + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[OR12]](i16) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[OR13]](i16) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C]](i32) + ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL16]] + ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[OR14]](i16) + ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[OR15]](i16) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXT11]], [[C]](i32) + ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[ZEXT10]], [[SHL17]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR16]](i32), [[OR17]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i64>) + %0:_(<12 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<12 x i16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + %2:_(<24 x i16>) = G_CONCAT_VECTORS %0(<12 x i16>), %1(<12 x i16>) + %3:_(<24 x i8>) = G_TRUNC %2(<24 x i16>) + %4:_(<3 x i64>) = G_BITCAST %3(<24 x i8>) + S_ENDPGM 0, implicit %4(<3 x i64>) ... --- @@ -2500,57 +2500,57 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_v8s8 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ADD4]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ADD5]](s16) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ADD6]](s16) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ADD7]](s16) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_BITCAST %0 - %2:_(<8 x s8>) = G_ADD %1, %1 - %3:_(<8 x s16>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C1]](i16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ADD3]](i16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[ADD4]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[ADD5]](i16) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[ADD6]](i16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[ADD7]](i16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<8 x i8>) = G_BITCAST %0(<4 x i16>) + %2:_(<8 x i8>) = G_ADD %1, %1 + %3:_(<8 x i16>) = G_ANYEXT %2(<8 x i8>) + S_ENDPGM 0, implicit %3(<8 x i16>) ... --- @@ -2562,82 +2562,82 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v4s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[OR]], [[OR]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[OR1]], [[OR1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[OR2]], [[OR2]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[OR3]], [[OR3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<8 x s8>) = G_TRUNC %0 - %2:_(<4 x s16>) = G_BITCAST %1 - %3:_(<4 x s16>) = G_ADD %2, %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<8 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C2]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C2]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i8) = COPY [[TRUNC10]](i8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i8) = COPY [[TRUNC11]](i8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(i8) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(i8) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C2]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i8) = COPY [[TRUNC8]](i8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i8) = COPY [[TRUNC9]](i8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i8) = COPY [[TRUNC10]](i8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i8) = COPY [[TRUNC11]](i8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i8) = COPY [[TRUNC12]](i8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i8) = COPY [[TRUNC13]](i8) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C2]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[OR]], [[OR]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[OR1]], [[OR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[OR2]], [[OR2]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[OR3]], [[OR3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ADD3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<8 x i8>) = G_TRUNC %0(<8 x i16>) + %2:_(<4 x i16>) = G_BITCAST %1(<8 x i8>) + %3:_(<4 x i16>) = G_ADD %2, %2 + S_ENDPGM 0, implicit %3(<4 x i16>) ... --- @@ -2649,47 +2649,47 @@ body: | ; CHECK-LABEL: name: test_bitcast_v64s32_to_v32s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>), [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>), [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>), [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>), [[UV20:%[0-9]+]]:_(<2 x s32>), [[UV21:%[0-9]+]]:_(<2 x s32>), [[UV22:%[0-9]+]]:_(<2 x s32>), [[UV23:%[0-9]+]]:_(<2 x s32>), [[UV24:%[0-9]+]]:_(<2 x s32>), [[UV25:%[0-9]+]]:_(<2 x s32>), [[UV26:%[0-9]+]]:_(<2 x s32>), [[UV27:%[0-9]+]]:_(<2 x s32>), [[UV28:%[0-9]+]]:_(<2 x s32>), [[UV29:%[0-9]+]]:_(<2 x s32>), [[UV30:%[0-9]+]]:_(<2 x s32>), [[UV31:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[UV]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s64) = G_BITCAST [[UV2]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[UV3]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s64) = G_BITCAST [[UV4]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s64) = G_BITCAST [[UV5]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s64) = G_BITCAST [[UV6]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s64) = G_BITCAST [[UV7]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s64) = G_BITCAST [[UV8]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s64) = G_BITCAST [[UV9]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s64) = G_BITCAST [[UV10]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s64) = G_BITCAST [[UV11]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(s64) = G_BITCAST [[UV12]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(s64) = G_BITCAST [[UV13]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(s64) = G_BITCAST [[UV14]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(s64) = G_BITCAST [[UV15]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s64) = G_BITCAST [[UV16]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s64) = G_BITCAST [[UV17]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s64) = G_BITCAST [[UV18]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(s64) = G_BITCAST [[UV19]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(s64) = G_BITCAST [[UV20]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(s64) = G_BITCAST [[UV21]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(s64) = G_BITCAST [[UV22]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST23:%[0-9]+]]:_(s64) = G_BITCAST [[UV23]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST24:%[0-9]+]]:_(s64) = G_BITCAST [[UV24]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST25:%[0-9]+]]:_(s64) = G_BITCAST [[UV25]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST26:%[0-9]+]]:_(s64) = G_BITCAST [[UV26]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST27:%[0-9]+]]:_(s64) = G_BITCAST [[UV27]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST28:%[0-9]+]]:_(s64) = G_BITCAST [[UV28]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST29:%[0-9]+]]:_(s64) = G_BITCAST [[UV29]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST30:%[0-9]+]]:_(s64) = G_BITCAST [[UV30]](<2 x s32>) - ; CHECK-NEXT: [[BITCAST31:%[0-9]+]]:_(s64) = G_BITCAST [[UV31]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST]](s64), [[BITCAST1]](s64), [[BITCAST2]](s64), [[BITCAST3]](s64), [[BITCAST4]](s64), [[BITCAST5]](s64), [[BITCAST6]](s64), [[BITCAST7]](s64), [[BITCAST8]](s64), [[BITCAST9]](s64), [[BITCAST10]](s64), [[BITCAST11]](s64), [[BITCAST12]](s64), [[BITCAST13]](s64), [[BITCAST14]](s64), [[BITCAST15]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST16]](s64), [[BITCAST17]](s64), [[BITCAST18]](s64), [[BITCAST19]](s64), [[BITCAST20]](s64), [[BITCAST21]](s64), [[BITCAST22]](s64), [[BITCAST23]](s64), [[BITCAST24]](s64), [[BITCAST25]](s64), [[BITCAST26]](s64), [[BITCAST27]](s64), [[BITCAST28]](s64), [[BITCAST29]](s64), [[BITCAST30]](s64), [[BITCAST31]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>), implicit [[BUILD_VECTOR1]](<16 x s64>) - %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(<64 x s32>) = G_CONCAT_VECTORS %0, %0 - %2:_(<32 x s64>) = G_BITCAST %1 - %3:_(<16 x s64>), %4:_(<16 x s64>) = G_UNMERGE_VALUES %2 - S_ENDPGM 0, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>), [[UV4:%[0-9]+]]:_(<2 x i32>), [[UV5:%[0-9]+]]:_(<2 x i32>), [[UV6:%[0-9]+]]:_(<2 x i32>), [[UV7:%[0-9]+]]:_(<2 x i32>), [[UV8:%[0-9]+]]:_(<2 x i32>), [[UV9:%[0-9]+]]:_(<2 x i32>), [[UV10:%[0-9]+]]:_(<2 x i32>), [[UV11:%[0-9]+]]:_(<2 x i32>), [[UV12:%[0-9]+]]:_(<2 x i32>), [[UV13:%[0-9]+]]:_(<2 x i32>), [[UV14:%[0-9]+]]:_(<2 x i32>), [[UV15:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<32 x i32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x i32>), [[UV17:%[0-9]+]]:_(<2 x i32>), [[UV18:%[0-9]+]]:_(<2 x i32>), [[UV19:%[0-9]+]]:_(<2 x i32>), [[UV20:%[0-9]+]]:_(<2 x i32>), [[UV21:%[0-9]+]]:_(<2 x i32>), [[UV22:%[0-9]+]]:_(<2 x i32>), [[UV23:%[0-9]+]]:_(<2 x i32>), [[UV24:%[0-9]+]]:_(<2 x i32>), [[UV25:%[0-9]+]]:_(<2 x i32>), [[UV26:%[0-9]+]]:_(<2 x i32>), [[UV27:%[0-9]+]]:_(<2 x i32>), [[UV28:%[0-9]+]]:_(<2 x i32>), [[UV29:%[0-9]+]]:_(<2 x i32>), [[UV30:%[0-9]+]]:_(<2 x i32>), [[UV31:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<32 x i32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[UV2]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[UV3]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[UV4]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[UV5]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i64) = G_BITCAST [[UV6]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i64) = G_BITCAST [[UV7]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(i64) = G_BITCAST [[UV8]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(i64) = G_BITCAST [[UV9]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(i64) = G_BITCAST [[UV10]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(i64) = G_BITCAST [[UV11]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(i64) = G_BITCAST [[UV12]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(i64) = G_BITCAST [[UV13]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(i64) = G_BITCAST [[UV14]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(i64) = G_BITCAST [[UV15]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(i64) = G_BITCAST [[UV16]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(i64) = G_BITCAST [[UV17]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(i64) = G_BITCAST [[UV18]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(i64) = G_BITCAST [[UV19]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(i64) = G_BITCAST [[UV20]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(i64) = G_BITCAST [[UV21]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(i64) = G_BITCAST [[UV22]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST23:%[0-9]+]]:_(i64) = G_BITCAST [[UV23]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST24:%[0-9]+]]:_(i64) = G_BITCAST [[UV24]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST25:%[0-9]+]]:_(i64) = G_BITCAST [[UV25]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST26:%[0-9]+]]:_(i64) = G_BITCAST [[UV26]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST27:%[0-9]+]]:_(i64) = G_BITCAST [[UV27]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST28:%[0-9]+]]:_(i64) = G_BITCAST [[UV28]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST29:%[0-9]+]]:_(i64) = G_BITCAST [[UV29]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST30:%[0-9]+]]:_(i64) = G_BITCAST [[UV30]](<2 x i32>) + ; CHECK-NEXT: [[BITCAST31:%[0-9]+]]:_(i64) = G_BITCAST [[UV31]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i64>) = G_BUILD_VECTOR [[BITCAST]](i64), [[BITCAST1]](i64), [[BITCAST2]](i64), [[BITCAST3]](i64), [[BITCAST4]](i64), [[BITCAST5]](i64), [[BITCAST6]](i64), [[BITCAST7]](i64), [[BITCAST8]](i64), [[BITCAST9]](i64), [[BITCAST10]](i64), [[BITCAST11]](i64), [[BITCAST12]](i64), [[BITCAST13]](i64), [[BITCAST14]](i64), [[BITCAST15]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x i64>) = G_BUILD_VECTOR [[BITCAST16]](i64), [[BITCAST17]](i64), [[BITCAST18]](i64), [[BITCAST19]](i64), [[BITCAST20]](i64), [[BITCAST21]](i64), [[BITCAST22]](i64), [[BITCAST23]](i64), [[BITCAST24]](i64), [[BITCAST25]](i64), [[BITCAST26]](i64), [[BITCAST27]](i64), [[BITCAST28]](i64), [[BITCAST29]](i64), [[BITCAST30]](i64), [[BITCAST31]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x i64>), implicit [[BUILD_VECTOR1]](<16 x i64>) + %0:_(<32 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<64 x i32>) = G_CONCAT_VECTORS %0(<32 x i32>), %0(<32 x i32>) + %2:_(<32 x i64>) = G_BITCAST %1(<64 x i32>) + %3:_(<16 x i64>), %4:_(<16 x i64>) = G_UNMERGE_VALUES %2(<32 x i64>) + S_ENDPGM 0, implicit %3(<16 x i64>), implicit %4(<16 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir index 92eb440fbd8ea..e8c31ce1c983d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir @@ -10,16 +10,16 @@ body: | ; CHECK-LABEL: name: bitreverse_s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s8) = G_BITREVERSE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i32) = G_BITREVERSE [[COPY]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITREVERSE]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i8) = G_BITREVERSE %1 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -31,16 +31,16 @@ body: | ; CHECK-LABEL: name: bitreverse_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_BITREVERSE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i32) = G_BITREVERSE [[COPY]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITREVERSE]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_BITREVERSE %1 + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -52,16 +52,16 @@ body: | ; CHECK-LABEL: name: bitreverse_s24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(s24) = G_BITREVERSE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i32) = G_BITREVERSE [[COPY]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITREVERSE]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i24) = G_TRUNC %0(i32) + %2:_(i24) = G_BITREVERSE %1 + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -73,12 +73,12 @@ body: | ; CHECK-LABEL: name: bitreverse_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[BITREVERSE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_BITREVERSE %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i32) = G_BITREVERSE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[BITREVERSE]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_BITREVERSE %0 + $vgpr0 = COPY %1(i32) ... --- @@ -90,21 +90,21 @@ body: | ; CHECK-LABEL: name: bitreverse_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[BITCAST]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) - ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s32) = G_BITREVERSE [[LSHR]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE1]], [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_BITREVERSE %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i32) = G_BITREVERSE [[BITCAST]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITREVERSE]], [[C]](i32) + ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(i32) = G_BITREVERSE [[LSHR]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITREVERSE1]], [[C]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_BITREVERSE %0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -116,15 +116,15 @@ body: | ; CHECK-LABEL: name: bitreverse_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[UV]] - ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s32) = G_BITREVERSE [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITREVERSE]](s32), [[BITREVERSE1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_BITREVERSE %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i32) = G_BITREVERSE [[UV]] + ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(i32) = G_BITREVERSE [[UV1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BITREVERSE]](i32), [[BITREVERSE1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_BITREVERSE %0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -136,12 +136,12 @@ body: | ; CHECK-LABEL: name: bitreverse_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITREVERSE]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_BITREVERSE %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i64) = G_BITREVERSE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITREVERSE]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_BITREVERSE %0 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -153,13 +153,13 @@ body: | ; CHECK-LABEL: name: bitreverse_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[UV]] - ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[BITREVERSE]](s64), [[BITREVERSE1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_BITREVERSE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(i64) = G_BITREVERSE [[UV]] + ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(i64) = G_BITREVERSE [[UV1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[BITREVERSE]](i64), [[BITREVERSE1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = G_BITREVERSE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir index 9b5a0b2de4f81..598843d2f897f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir @@ -23,6 +23,6 @@ body: | ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BLOCK_ADDR]](p0) %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir index 57bbe020dca85..76ee03b7527fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -11,32 +11,35 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: + ; ; WAVE32-LABEL: name: legal_brcond_vcc ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - G_BRCOND %2, %bb.1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + G_BRCOND %2(i1), %bb.1 bb.1: + ... --- @@ -49,33 +52,37 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: + ; ; WAVE32-LABEL: name: legal_brcond_sgpr_s1 ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - G_BRCOND %2, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + G_BRCOND %2(i1), %bb.1 bb.1: + + ... --- @@ -88,33 +95,37 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:_(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: G_BRCOND [[ICMP]](i32), %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: + ; ; WAVE32-LABEL: name: legal_brcond_sgpr_s32 ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:_(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: G_BRCOND [[ICMP]](i32), %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: bb.0: + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ICMP intpred(eq), %0, %1 - G_BRCOND %2, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ICMP intpred(eq), %0(i32), %1 + G_BRCOND %2(i32), %bb.1 bb.1: + + ... --- @@ -125,35 +136,38 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(i64) = SI_IF [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: + ; ; WAVE32-LABEL: name: brcond_si_if ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i64) = SI_IF [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - G_BRCOND %3, %bb.1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2(i1) + G_BRCOND %3(i1), %bb.1 bb.1: + ... --- @@ -164,35 +178,38 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(i64) = SI_ELSE [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: + ; ; WAVE32-LABEL: name: brcond_si_else ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(i64) = SI_ELSE [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2 - G_BRCOND %3, %bb.1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2(i1) + G_BRCOND %3(i1), %bb.1 bb.1: + ... --- @@ -204,52 +221,58 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: S_NOP 0 - ; WAVE64-NEXT: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: SI_LOOP [[COPY2]](i64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.2 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: ; WAVE64-NEXT: S_NOP 0 + ; ; WAVE32-LABEL: name: brcond_si_loop_brcond ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: S_NOP 0 - ; WAVE32-NEXT: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: SI_LOOP [[COPY2]](i64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.2 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: ; WAVE32-NEXT: S_NOP 0 bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $sgpr0_sgpr1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $sgpr0_sgpr1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + S_NOP 0 - %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 - G_BRCOND %3, %bb.2 + %3:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2(i64) + G_BRCOND %3(i1), %bb.2 G_BR %bb.1 bb.2: S_NOP 0 + + ... # This usage is backwards from how the intrinsic is supposed to be @@ -263,52 +286,58 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: S_NOP 0 - ; WAVE64-NEXT: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: SI_LOOP [[COPY2]](i64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: ; WAVE64-NEXT: S_NOP 0 + ; ; WAVE32-LABEL: name: brcond_si_loop_brcond_back ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: S_NOP 0 - ; WAVE32-NEXT: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: SI_LOOP [[COPY2]](i64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: ; WAVE32-NEXT: S_NOP 0 bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $sgpr0_sgpr1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $sgpr0_sgpr1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + S_NOP 0 - %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 - G_BRCOND %3, %bb.1 + %3:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2(i64) + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.2: S_NOP 0 + + ... # This usage is backwards from how the intrinsic is supposed to be @@ -322,48 +351,54 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: S_NOP 0 - ; WAVE64-NEXT: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: SI_LOOP [[COPY2]](i64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: + ; ; WAVE32-LABEL: name: brcond_si_loop_brcond_back_fallthrough ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: S_NOP 0 - ; WAVE32-NEXT: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: SI_LOOP [[COPY2]](i64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $sgpr0_sgpr1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $sgpr0_sgpr1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + S_NOP 0 - %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 - G_BRCOND %3, %bb.1 + %3:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2(i64) + G_BRCOND %3(i1), %bb.1 bb.2: + + ... # There's another instruction between the intrinsic and the @@ -376,41 +411,44 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(i64) = SI_IF [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](s32) + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](i32) + ; ; WAVE32-LABEL: name: brcond_si_if_need_insert_terminator_point ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i64) = SI_IF [[ICMP]](i1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](s32) + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](i32) bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s32) = COPY $vgpr2 - G_BRCOND %3, %bb.1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2(i1) + %5:_(i32) = COPY $vgpr2 + G_BRCOND %3(i1), %bb.1 bb.1: - S_ENDPGM 0, implicit %5 + S_ENDPGM 0, implicit %5(i32) + ... --- @@ -422,9 +460,9 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -432,19 +470,20 @@ body: | ; WAVE64-NEXT: S_NOP 0 ; WAVE64-NEXT: S_NOP 0 ; WAVE64-NEXT: S_NOP 0 - ; WAVE64-NEXT: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: SI_LOOP [[COPY2]](i64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.2 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: ; WAVE64-NEXT: S_NOP 0 + ; ; WAVE32-LABEL: name: brcond_si_loop_need_terminator_insert_point ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -452,28 +491,33 @@ body: | ; WAVE32-NEXT: S_NOP 0 ; WAVE32-NEXT: S_NOP 0 ; WAVE32-NEXT: S_NOP 0 - ; WAVE32-NEXT: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: SI_LOOP [[COPY2]](i64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.2 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: ; WAVE32-NEXT: S_NOP 0 bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $sgpr0_sgpr1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $sgpr0_sgpr1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + S_NOP 0 - %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 + %3:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2(i64) S_NOP 0 S_NOP 0 - G_BRCOND %3, %bb.2 + G_BRCOND %3(i1), %bb.2 G_BR %bb.1 bb.2: S_NOP 0 + + ... --- @@ -484,10 +528,10 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(i64) = SI_IF [[ICMP]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: @@ -497,15 +541,16 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: ; WAVE64-NEXT: S_NOP 1 + ; ; WAVE32-LABEL: name: brcond_si_if_negated ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i64) = SI_IF [[ICMP]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: @@ -516,21 +561,26 @@ body: | ; WAVE32-NEXT: bb.2: ; WAVE32-NEXT: S_NOP 1 bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s1) = G_CONSTANT i1 true - %6:_(s1) = G_XOR %3, %5 - G_BRCOND %6, %bb.2 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2(i1) + %5:_(i1) = G_CONSTANT i1 true + %6:_(i1) = G_XOR %3, %5 + G_BRCOND %6(i1), %bb.2 bb.1: + successors: %bb.2(0x80000000) + S_NOP 0 bb.2: S_NOP 1 + + ... --- @@ -541,10 +591,10 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE64-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec(i64) = SI_IF [[ICMP]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.3 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: @@ -559,15 +609,16 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.3: ; WAVE64-NEXT: S_NOP 2 + ; ; WAVE32-LABEL: name: brcond_si_if_br_negated ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; WAVE32-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i64) = SI_IF [[ICMP]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.3 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: @@ -583,25 +634,33 @@ body: | ; WAVE32-NEXT: bb.3: ; WAVE32-NEXT: S_NOP 2 bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 - %5:_(s1) = G_CONSTANT i1 true - %6:_(s1) = G_XOR %3, %5 - G_BRCOND %6, %bb.2 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1), %4:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2(i1) + %5:_(i1) = G_CONSTANT i1 true + %6:_(i1) = G_XOR %3, %5 + G_BRCOND %6(i1), %bb.2 G_BR %bb.3 bb.1: + successors: %bb.2(0x80000000) + S_NOP 0 bb.2: + successors: %bb.3(0x80000000) + S_NOP 1 bb.3: S_NOP 2 + + + ... --- @@ -613,53 +672,59 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: S_NOP 0 - ; WAVE64-NEXT: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: SI_LOOP [[COPY2]](i64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.2 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: ; WAVE64-NEXT: S_NOP 0 + ; ; WAVE32-LABEL: name: brcond_si_loop_brcond_negated ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: S_NOP 0 - ; WAVE32-NEXT: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: SI_LOOP [[COPY2]](i64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.2 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: ; WAVE32-NEXT: S_NOP 0 bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $sgpr0_sgpr1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $sgpr0_sgpr1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + S_NOP 0 - %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 - %4:_(s1) = G_CONSTANT i1 true - %5:_(s1) = G_XOR %3, %4 - G_BRCOND %5, %bb.1 + %3:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2(i64) + %4:_(i1) = G_CONSTANT i1 true + %5:_(i1) = G_XOR %3, %4 + G_BRCOND %5(i1), %bb.1 bb.2: S_NOP 0 + + ... --- @@ -671,52 +736,58 @@ body: | ; WAVE64-NEXT: successors: %bb.1(0x80000000) ; WAVE64-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.1: ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: S_NOP 0 - ; WAVE64-NEXT: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64-NEXT: SI_LOOP [[COPY2]](i64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64-NEXT: G_BR %bb.1 ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: ; WAVE64-NEXT: S_NOP 0 + ; ; WAVE32-LABEL: name: brcond_si_loop_brcond_br_negated ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x80000000) ; WAVE32-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(i64) = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.1: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: S_NOP 0 - ; WAVE32-NEXT: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32-NEXT: SI_LOOP [[COPY2]](i64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32-NEXT: G_BR %bb.1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: bb.2: ; WAVE32-NEXT: S_NOP 0 bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $sgpr0_sgpr1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $sgpr0_sgpr1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + S_NOP 0 - %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 - %4:_(s1) = G_CONSTANT i1 true - %5:_(s1) = G_XOR %3, %4 - G_BRCOND %5, %bb.2 + %3:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2(i64) + %4:_(i1) = G_CONSTANT i1 true + %5:_(i1) = G_XOR %3, %4 + G_BRCOND %5(i1), %bb.2 G_BR %bb.1 bb.2: S_NOP 0 + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir index 2794a3fa31daa..17098d0a1d5d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -11,32 +11,32 @@ body: | ; GFX7-LABEL: name: bswap_s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: bswap_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BSWAP]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_BSWAP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(i16) = G_BSWAP [[TRUNC]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BSWAP]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_BSWAP %1 + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -48,19 +48,19 @@ body: | ; GFX7-LABEL: name: bswap_s32 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] - ; GFX7-NEXT: $vgpr0 = COPY [[BSWAP]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[COPY]] + ; GFX7-NEXT: $vgpr0 = COPY [[BSWAP]](i32) ; ; GFX8-LABEL: name: bswap_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] - ; GFX8-NEXT: $vgpr0 = COPY [[BSWAP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_BSWAP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[COPY]] + ; GFX8-NEXT: $vgpr0 = COPY [[BSWAP]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_BSWAP %0 + $vgpr0 = COPY %1(i32) ... --- @@ -72,42 +72,42 @@ body: | ; GFX7-LABEL: name: bswap_v2s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C1]](s32) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C1]](i32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[COPY2]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[TRUNC3]], [[TRUNC2]] + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX8-LABEL: name: bswap_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[COPY]] - ; GFX8-NEXT: $vgpr0 = COPY [[BSWAP]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_BSWAP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x i16>) = G_BSWAP [[COPY]] + ; GFX8-NEXT: $vgpr0 = COPY [[BSWAP]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_BSWAP %0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -119,83 +119,83 @@ body: | ; GFX7-LABEL: name: bswap_v3s16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY3]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY4]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY6]](s32) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY3]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[COPY4]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[TRUNC3]], [[TRUNC2]] + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[COPY2]], [[COPY6]](i32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C1]] + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY7]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[TRUNC5]], [[TRUNC4]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[OR1]](i16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[OR2]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) ; ; GFX8-LABEL: name: bswap_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[BITCAST]] - ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[BITCAST1]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BSWAP]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BSWAP1]](<2 x s16>) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x i16>) = G_BSWAP [[BITCAST]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(<2 x i16>) = G_BSWAP [[BITCAST1]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BSWAP]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BSWAP1]](<2 x i16>) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(<3 x i16>) = G_BUILD_VECTOR %3(i16), %4(i16), %5(i16) + %7:_(<3 x i16>) = G_BSWAP %6 + %8:_(i16), %9:_(i16), %10:_(i16) = G_UNMERGE_VALUES %7(<3 x i16>) + %11:_(i32) = G_ANYEXT %8(i16) + %12:_(i32) = G_ANYEXT %9(i16) + %13:_(i32) = G_ANYEXT %10(i16) + $vgpr0 = COPY %11(i32) + $vgpr1 = COPY %12(i32) + $vgpr2 = COPY %13(i32) - %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 - %7:_(<3 x s16>) = G_BSWAP %6 - %8:_(s16), %9:_(s16), %10:_(s16) = G_UNMERGE_VALUES %7 - %11:_(s32) = G_ANYEXT %8 - %12:_(s32) = G_ANYEXT %9 - %13:_(s32) = G_ANYEXT %10 - $vgpr0 = COPY %11 - $vgpr1 = COPY %12 - $vgpr2 = COPY %13 ... --- @@ -207,69 +207,69 @@ body: | ; GFX7-LABEL: name: bswap_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C1]](s32) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY6]](s32) - ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY7]](s32) - ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX7-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX7-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX7-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C1]](i32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[COPY2]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[TRUNC3]], [[TRUNC2]] + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[COPY4]](i32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[TRUNC5]], [[TRUNC4]] + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[COPY6]](i32) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY7]](i32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[TRUNC7]], [[TRUNC6]] + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX7-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; GFX7-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX7-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: bswap_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV]] - ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV1]] - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BSWAP]](<2 x s16>), [[BSWAP1]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_BSWAP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x i16>) = G_BSWAP [[UV]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(<2 x i16>) = G_BSWAP [[UV1]] + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BSWAP]](<2 x i16>), [[BSWAP1]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_BSWAP %0 + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -281,25 +281,25 @@ body: | ; GFX7-LABEL: name: bswap_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV]] + ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV1]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: bswap_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_BSWAP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_BSWAP %0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -311,25 +311,25 @@ body: | ; GFX7-LABEL: name: bswap_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV1]] + ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV]] + ; GFX7-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX8-LABEL: name: bswap_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_BSWAP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV1]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_BSWAP %0 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -341,37 +341,37 @@ body: | ; GFX7-LABEL: name: bswap_v2s64 ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]] - ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]] - ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX7-NEXT: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]] - ; GFX7-NEXT: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]] - ; GFX7-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV3]] + ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV2]] + ; GFX7-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX7-NEXT: [[BSWAP2:%[0-9]+]]:_(i32) = G_BSWAP [[UV5]] + ; GFX7-NEXT: [[BSWAP3:%[0-9]+]]:_(i32) = G_BSWAP [[UV4]] + ; GFX7-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP2]](i32), [[BSWAP3]](i32) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: bswap_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]] - ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]] - ; GFX8-NEXT: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_BSWAP %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV3]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[BSWAP2:%[0-9]+]]:_(i32) = G_BSWAP [[UV5]] + ; GFX8-NEXT: [[BSWAP3:%[0-9]+]]:_(i32) = G_BSWAP [[UV4]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP2]](i32), [[BSWAP3]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = G_BSWAP %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -383,29 +383,29 @@ body: | ; GFX7-LABEL: name: bswap_s48 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[C]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV1]] + ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV]] + ; GFX7-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[C]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; GFX8-LABEL: name: bswap_s48 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[C]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_TRUNC %0 - %2:_(s48) = G_BSWAP %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(i32) = G_BSWAP [[UV1]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(i32) = G_BSWAP [[UV]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[BSWAP]](i32), [[BSWAP1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[C]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i48) = G_TRUNC %0(i64) + %2:_(i48) = G_BSWAP %1 + %3:_(i64) = G_ANYEXT %2(i48) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir index 7a0dec6091f70..1e9fc1bfc7d7d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: build_vector_v8s16_splat ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16256 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) - ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %0:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s16) = G_CONSTANT i16 16256 - %3:_(<8 x s16>) = G_BUILD_VECTOR %4(s16), %4(s16), %4(s16), %4(s16), %4(s16), %4(s16), %4(s16), %4(s16) - S_NOP 0, implicit %3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 16256 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i16) = G_CONSTANT i16 16256 + %4:_(<8 x i16>) = G_BUILD_VECTOR %3(i16), %3(i16), %3(i16), %3(i16), %3(i16), %3(i16), %3(i16), %3(i16) + S_NOP 0, implicit %4(<8 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir index fc2dadcc70489..c02c41d699ac4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir @@ -14,25 +14,26 @@ body: | ; GFX9PLUS-LABEL: name: legal_s32_to_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) - ; GFX9PLUS-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[COPY]](i32), [[COPY1]](i32) + ; GFX9PLUS-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; ; PREGFX8-LABEL: name: legal_s32_to_v2s16 ; PREGFX8: liveins: $vgpr0, $vgpr1 ; PREGFX8-NEXT: {{ $}} - ; PREGFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PREGFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PREGFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PREGFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; PREGFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; PREGFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PREGFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; PREGFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PREGFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PREGFX8-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_NOP 0, implicit %2 + ; PREGFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; PREGFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; PREGFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; PREGFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; PREGFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; PREGFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; PREGFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; PREGFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; PREGFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; PREGFX8-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %0(i32), %1(i32) + S_NOP 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir index 25652b69afa92..e7a323c2a08c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir @@ -9,14 +9,14 @@ body: | ; CHECK-LABEL: name: legal_v2s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_NOP 0, implicit %2(<2 x i32>) ... --- name: legal_v3s32 @@ -26,16 +26,16 @@ body: | ; CHECK-LABEL: name: legal_v3s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(<3 x s32>) = G_BUILD_VECTOR %0, %1, %2 - S_NOP 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + S_NOP 0, implicit %3(<3 x i32>) ... --- name: legal_v4s32 @@ -45,18 +45,18 @@ body: | ; CHECK-LABEL: name: legal_v4s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + S_NOP 0, implicit %4(<4 x i32>) ... --- name: legal_v5s32 @@ -66,20 +66,20 @@ body: | ; CHECK-LABEL: name: legal_v5s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(<5 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4 - S_NOP 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(<5 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32) + S_NOP 0, implicit %5(<5 x i32>) ... --- name: legal_v6s32 @@ -89,22 +89,22 @@ body: | ; CHECK-LABEL: name: legal_v6s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(<6 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(<6 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32) + S_NOP 0, implicit %6(<6 x i32>) ... --- name: legal_v7s32 @@ -114,24 +114,24 @@ body: | ; CHECK-LABEL: name: legal_v7s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(<7 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6 - S_NOP 0, implicit %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(<7 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32) + S_NOP 0, implicit %7(<7 x i32>) ... --- name: legal_v8s32 @@ -141,26 +141,26 @@ body: | ; CHECK-LABEL: name: legal_v8s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(<8 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7 - S_NOP 0, implicit %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(<8 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32) + S_NOP 0, implicit %8(<8 x i32>) ... --- name: legal_v9s32 @@ -170,28 +170,28 @@ body: | ; CHECK-LABEL: name: legal_v9s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<9 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(<9 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8 - S_NOP 0, implicit %9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<9 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<9 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(<9 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32) + S_NOP 0, implicit %9(<9 x i32>) ... --- name: legal_v10s32 @@ -201,30 +201,30 @@ body: | ; CHECK-LABEL: name: legal_v10s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<10 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(<10 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9 - S_NOP 0, implicit %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<10 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<10 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(<10 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32), %9(i32) + S_NOP 0, implicit %10(<10 x i32>) ... --- name: legal_v11s32 @@ -234,32 +234,32 @@ body: | ; CHECK-LABEL: name: legal_v11s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<11 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(<11 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10 - S_NOP 0, implicit %11 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<11 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<11 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(i32) = COPY $vgpr10 + %11:_(<11 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32), %9(i32), %10(i32) + S_NOP 0, implicit %11(<11 x i32>) ... --- name: legal_v12s32 @@ -269,34 +269,34 @@ body: | ; CHECK-LABEL: name: legal_v12s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<12 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(s32) = COPY $vgpr11 - %12:_(<12 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11 - S_NOP 0, implicit %12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<12 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(i32) = COPY $vgpr10 + %11:_(i32) = COPY $vgpr11 + %12:_(<12 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32), %9(i32), %10(i32), %11(i32) + S_NOP 0, implicit %12(<12 x i32>) ... --- name: legal_v16s32 @@ -306,42 +306,42 @@ body: | ; CHECK-LABEL: name: legal_v16s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(s32) = COPY $vgpr11 - %12:_(s32) = COPY $vgpr12 - %13:_(s32) = COPY $vgpr13 - %14:_(s32) = COPY $vgpr14 - %15:_(s32) = COPY $vgpr15 - %16:_(<16 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - S_NOP 0, implicit %16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(i32) = COPY $vgpr10 + %11:_(i32) = COPY $vgpr11 + %12:_(i32) = COPY $vgpr12 + %13:_(i32) = COPY $vgpr13 + %14:_(i32) = COPY $vgpr14 + %15:_(i32) = COPY $vgpr15 + %16:_(<16 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32), %9(i32), %10(i32), %11(i32), %12(i32), %13(i32), %14(i32), %15(i32) + S_NOP 0, implicit %16(<16 x i32>) ... --- name: legal_v32s32 @@ -351,74 +351,74 @@ body: | ; CHECK-LABEL: name: legal_v32s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<32 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(s32) = COPY $vgpr11 - %12:_(s32) = COPY $vgpr12 - %13:_(s32) = COPY $vgpr13 - %14:_(s32) = COPY $vgpr14 - %15:_(s32) = COPY $vgpr15 - %16:_(s32) = COPY $vgpr16 - %17:_(s32) = COPY $vgpr17 - %18:_(s32) = COPY $vgpr18 - %19:_(s32) = COPY $vgpr19 - %20:_(s32) = COPY $vgpr20 - %21:_(s32) = COPY $vgpr21 - %22:_(s32) = COPY $vgpr22 - %23:_(s32) = COPY $vgpr23 - %24:_(s32) = COPY $vgpr24 - %25:_(s32) = COPY $vgpr25 - %26:_(s32) = COPY $vgpr26 - %27:_(s32) = COPY $vgpr27 - %28:_(s32) = COPY $vgpr28 - %29:_(s32) = COPY $vgpr29 - %30:_(s32) = COPY $vgpr30 - %31:_(s32) = COPY $vgpr31 - %32:_(<32 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15, %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 - S_NOP 0, implicit %32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(i32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(i32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32), [[COPY16]](i32), [[COPY17]](i32), [[COPY18]](i32), [[COPY19]](i32), [[COPY20]](i32), [[COPY21]](i32), [[COPY22]](i32), [[COPY23]](i32), [[COPY24]](i32), [[COPY25]](i32), [[COPY26]](i32), [[COPY27]](i32), [[COPY28]](i32), [[COPY29]](i32), [[COPY30]](i32), [[COPY31]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<32 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(i32) = COPY $vgpr10 + %11:_(i32) = COPY $vgpr11 + %12:_(i32) = COPY $vgpr12 + %13:_(i32) = COPY $vgpr13 + %14:_(i32) = COPY $vgpr14 + %15:_(i32) = COPY $vgpr15 + %16:_(i32) = COPY $vgpr16 + %17:_(i32) = COPY $vgpr17 + %18:_(i32) = COPY $vgpr18 + %19:_(i32) = COPY $vgpr19 + %20:_(i32) = COPY $vgpr20 + %21:_(i32) = COPY $vgpr21 + %22:_(i32) = COPY $vgpr22 + %23:_(i32) = COPY $vgpr23 + %24:_(i32) = COPY $vgpr24 + %25:_(i32) = COPY $vgpr25 + %26:_(i32) = COPY $vgpr26 + %27:_(i32) = COPY $vgpr27 + %28:_(i32) = COPY $vgpr28 + %29:_(i32) = COPY $vgpr29 + %30:_(i32) = COPY $vgpr30 + %31:_(i32) = COPY $vgpr31 + %32:_(<32 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32), %9(i32), %10(i32), %11(i32), %12(i32), %13(i32), %14(i32), %15(i32), %16(i32), %17(i32), %18(i32), %19(i32), %20(i32), %21(i32), %22(i32), %23(i32), %24(i32), %25(i32), %26(i32), %27(i32), %28(i32), %29(i32), %30(i32), %31(i32) + S_NOP 0, implicit %32(<32 x i32>) ... --- name: legal_v2s64 @@ -428,14 +428,14 @@ body: | ; CHECK-LABEL: name: legal_v2s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + S_NOP 0, implicit %2(<2 x i64>) ... --- name: legal_v3s64 @@ -445,16 +445,16 @@ body: | ; CHECK-LABEL: name: legal_v3s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(<3 x s64>) = G_BUILD_VECTOR %0, %1, %2 - S_NOP 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(<3 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64) + S_NOP 0, implicit %3(<3 x i64>) ... --- name: legal_v4s64 @@ -464,18 +464,18 @@ body: | ; CHECK-LABEL: name: legal_v4s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = COPY $vgpr6_vgpr7 - %4:_(<4 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64), [[COPY3]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = COPY $vgpr6_vgpr7 + %4:_(<4 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64) + S_NOP 0, implicit %4(<4 x i64>) ... --- name: legal_v5s64 @@ -485,20 +485,20 @@ body: | ; CHECK-LABEL: name: legal_v5s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = COPY $vgpr6_vgpr7 - %4:_(s64) = COPY $vgpr8_vgpr9 - %5:_(<5 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3, %4 - S_NOP 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i64) = COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64), [[COPY3]](i64), [[COPY4]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = COPY $vgpr6_vgpr7 + %4:_(i64) = COPY $vgpr8_vgpr9 + %5:_(<5 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64), %4(i64) + S_NOP 0, implicit %5(<5 x i64>) ... --- name: legal_v6s64 @@ -508,22 +508,22 @@ body: | ; CHECK-LABEL: name: legal_v6s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = COPY $vgpr6_vgpr7 - %4:_(s64) = COPY $vgpr8_vgpr9 - %5:_(s64) = COPY $vgpr10_vgpr11 - %6:_(<6 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i64) = COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY $vgpr10_vgpr11 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64), [[COPY3]](i64), [[COPY4]](i64), [[COPY5]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = COPY $vgpr6_vgpr7 + %4:_(i64) = COPY $vgpr8_vgpr9 + %5:_(i64) = COPY $vgpr10_vgpr11 + %6:_(<6 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64), %4(i64), %5(i64) + S_NOP 0, implicit %6(<6 x i64>) ... --- name: legal_v7s64 @@ -533,24 +533,24 @@ body: | ; CHECK-LABEL: name: legal_v7s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = COPY $vgpr6_vgpr7 - %4:_(s64) = COPY $vgpr8_vgpr9 - %5:_(s64) = COPY $vgpr10_vgpr11 - %6:_(s64) = COPY $vgpr12_vgpr13 - %7:_(<7 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6 - S_NOP 0, implicit %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i64) = COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY $vgpr10_vgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i64) = COPY $vgpr12_vgpr13 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64), [[COPY3]](i64), [[COPY4]](i64), [[COPY5]](i64), [[COPY6]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = COPY $vgpr6_vgpr7 + %4:_(i64) = COPY $vgpr8_vgpr9 + %5:_(i64) = COPY $vgpr10_vgpr11 + %6:_(i64) = COPY $vgpr12_vgpr13 + %7:_(<7 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64), %4(i64), %5(i64), %6(i64) + S_NOP 0, implicit %7(<7 x i64>) ... --- name: legal_v8s64 @@ -560,26 +560,26 @@ body: | ; CHECK-LABEL: name: legal_v8s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = COPY $vgpr6_vgpr7 - %4:_(s64) = COPY $vgpr8_vgpr9 - %5:_(s64) = COPY $vgpr10_vgpr11 - %6:_(s64) = COPY $vgpr12_vgpr13 - %7:_(s64) = COPY $vgpr14_vgpr15 - %8:_(<8 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7 - S_NOP 0, implicit %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i64) = COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY $vgpr10_vgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i64) = COPY $vgpr12_vgpr13 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i64) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64), [[COPY3]](i64), [[COPY4]](i64), [[COPY5]](i64), [[COPY6]](i64), [[COPY7]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = COPY $vgpr6_vgpr7 + %4:_(i64) = COPY $vgpr8_vgpr9 + %5:_(i64) = COPY $vgpr10_vgpr11 + %6:_(i64) = COPY $vgpr12_vgpr13 + %7:_(i64) = COPY $vgpr14_vgpr15 + %8:_(<8 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64), %4(i64), %5(i64), %6(i64), %7(i64) + S_NOP 0, implicit %8(<8 x i64>) ... --- @@ -590,42 +590,42 @@ body: | ; CHECK-LABEL: name: legal_v16s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15, $vgpr16_vgpr17, $vgpr18_vgpr19, $vgpr20_vgpr21, $vgpr22_vgpr23, $vgpr24_vgpr25, $vgpr26_vgpr27, $vgpr28_vgpr29, $vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY $vgpr16_vgpr17 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY $vgpr18_vgpr19 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY $vgpr20_vgpr21 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY $vgpr22_vgpr23 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY $vgpr24_vgpr25 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY $vgpr26_vgpr27 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY $vgpr28_vgpr29 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY $vgpr30_vgpr31 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64), [[COPY9]](s64), [[COPY10]](s64), [[COPY11]](s64), [[COPY12]](s64), [[COPY13]](s64), [[COPY14]](s64), [[COPY15]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = COPY $vgpr6_vgpr7 - %4:_(s64) = COPY $vgpr8_vgpr9 - %5:_(s64) = COPY $vgpr10_vgpr11 - %6:_(s64) = COPY $vgpr12_vgpr13 - %7:_(s64) = COPY $vgpr14_vgpr15 - %8:_(s64) = COPY $vgpr16_vgpr17 - %9:_(s64) = COPY $vgpr18_vgpr19 - %10:_(s64) = COPY $vgpr20_vgpr21 - %11:_(s64) = COPY $vgpr22_vgpr23 - %12:_(s64) = COPY $vgpr24_vgpr25 - %13:_(s64) = COPY $vgpr26_vgpr27 - %14:_(s64) = COPY $vgpr28_vgpr29 - %15:_(s64) = COPY $vgpr30_vgpr31 - %16:_(<16 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - S_NOP 0, implicit %16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i64) = COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY $vgpr10_vgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i64) = COPY $vgpr12_vgpr13 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i64) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i64) = COPY $vgpr16_vgpr17 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i64) = COPY $vgpr18_vgpr19 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i64) = COPY $vgpr20_vgpr21 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(i64) = COPY $vgpr22_vgpr23 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(i64) = COPY $vgpr24_vgpr25 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(i64) = COPY $vgpr26_vgpr27 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(i64) = COPY $vgpr28_vgpr29 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(i64) = COPY $vgpr30_vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[COPY1]](i64), [[COPY2]](i64), [[COPY3]](i64), [[COPY4]](i64), [[COPY5]](i64), [[COPY6]](i64), [[COPY7]](i64), [[COPY8]](i64), [[COPY9]](i64), [[COPY10]](i64), [[COPY11]](i64), [[COPY12]](i64), [[COPY13]](i64), [[COPY14]](i64), [[COPY15]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = COPY $vgpr6_vgpr7 + %4:_(i64) = COPY $vgpr8_vgpr9 + %5:_(i64) = COPY $vgpr10_vgpr11 + %6:_(i64) = COPY $vgpr12_vgpr13 + %7:_(i64) = COPY $vgpr14_vgpr15 + %8:_(i64) = COPY $vgpr16_vgpr17 + %9:_(i64) = COPY $vgpr18_vgpr19 + %10:_(i64) = COPY $vgpr20_vgpr21 + %11:_(i64) = COPY $vgpr22_vgpr23 + %12:_(i64) = COPY $vgpr24_vgpr25 + %13:_(i64) = COPY $vgpr26_vgpr27 + %14:_(i64) = COPY $vgpr28_vgpr29 + %15:_(i64) = COPY $vgpr30_vgpr31 + %16:_(<16 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64), %4(i64), %5(i64), %6(i64), %7(i64), %8(i64), %9(i64), %10(i64), %11(i64), %12(i64), %13(i64), %14(i64), %15(i64) + S_NOP 0, implicit %16(<16 x i64>) ... --- @@ -637,14 +637,14 @@ body: | ; CHECK-LABEL: name: legal_v2s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[COPY]](s128), [[COPY1]](s128) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s128>) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s128>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[COPY]](i128), [[COPY1]](i128) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i128>) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i128>) = G_BUILD_VECTOR %0(i128), %1(i128) + S_NOP 0, implicit %2(<2 x i128>) ... --- @@ -661,8 +661,8 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p3>) %0:_(p3) = COPY $vgpr0 %1:_(p3) = COPY $vgpr1 - %2:_(<2 x p3>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + %2:_(<2 x p3>) = G_BUILD_VECTOR %0(p3), %1(p3) + S_NOP 0, implicit %2(<2 x p3>) ... --- name: legal_v3p3 @@ -680,8 +680,8 @@ body: | %0:_(p3) = COPY $vgpr0 %1:_(p3) = COPY $vgpr1 %2:_(p3) = COPY $vgpr2 - %3:_(<3 x p3>) = G_BUILD_VECTOR %0, %1, %2 - S_NOP 0, implicit %3 + %3:_(<3 x p3>) = G_BUILD_VECTOR %0(p3), %1(p3), %2(p3) + S_NOP 0, implicit %3(<3 x p3>) ... --- @@ -698,8 +698,8 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p0>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - %2:_(<2 x p0>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + %2:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1(p0) + S_NOP 0, implicit %2(<2 x p0>) ... --- @@ -716,8 +716,8 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p999>) %0:_(p999) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - %2:_(<2 x p999>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + %2:_(<2 x p999>) = G_BUILD_VECTOR %0(p999), %1(p999) + S_NOP 0, implicit %2(<2 x p999>) ... --- @@ -729,14 +729,14 @@ body: | ; CHECK-LABEL: name: legal_v2s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s256>) = G_BUILD_VECTOR [[COPY]](s256), [[COPY1]](s256) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s256>) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %2:_(<2 x s256>) = G_BUILD_VECTOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i256>) = G_BUILD_VECTOR [[COPY]](i256), [[COPY1]](i256) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i256>) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %2:_(<2 x i256>) = G_BUILD_VECTOR %0(i256), %1(i256) + S_NOP 0, implicit %2(<2 x i256>) ... --- @@ -748,16 +748,16 @@ body: | ; CHECK-LABEL: name: legal_v4s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10, $vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s128) = COPY $vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s128>) = G_BUILD_VECTOR [[COPY]](s128), [[COPY1]](s128), [[COPY2]](s128), [[COPY3]](s128) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s128>) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(s128)= COPY $vgpr12_vgpr13_vgpr14_vgpr15 - %4:_(<4 x s128>) = G_BUILD_VECTOR %0, %1, %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i128) = COPY $vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i128>) = G_BUILD_VECTOR [[COPY]](i128), [[COPY1]](i128), [[COPY2]](i128), [[COPY3]](i128) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i128>) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(i128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(i128) = COPY $vgpr12_vgpr13_vgpr14_vgpr15 + %4:_(<4 x i128>) = G_BUILD_VECTOR %0(i128), %1(i128), %2(i128), %3(i128) + S_NOP 0, implicit %4(<4 x i128>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir index 8a8fde147f462..7aa6d9e07be40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir @@ -14,31 +14,32 @@ body: | ; GFX78-LABEL: name: build_vector_v2s16 ; GFX78: liveins: $vgpr0, $vgpr1 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x i16>) + ; ; GFX9-LABEL: name: build_vector_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 - S_NOP 0, implicit %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i16>) = G_BUILD_VECTOR %2(i16), %3(i16) + S_NOP 0, implicit %4(<2 x i16>) ... --- @@ -50,51 +51,52 @@ body: | ; GFX78-LABEL: name: build_vector_v3s16 ; GFX78: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>) + ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x i16>) + ; ; GFX9-LABEL: name: build_vector_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 - %7:_(<6 x s16>) = G_CONCAT_VECTORS %6, %6 - S_NOP 0, implicit %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(<3 x i16>) = G_BUILD_VECTOR %3(i16), %4(i16), %5(i16) + %7:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %6(<3 x i16>) + S_NOP 0, implicit %7(<6 x i16>) ... --- @@ -106,49 +108,50 @@ body: | ; GFX78-LABEL: name: build_vector_v4s16 ; GFX78: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + ; ; GFX9-LABEL: name: build_vector_v4s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s16) = G_TRUNC %0 - %5:_(s16) = G_TRUNC %1 - %6:_(s16) = G_TRUNC %2 - %7:_(s16) = G_TRUNC %3 - %8:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %6, %7 - S_NOP 0, implicit %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i16) = G_TRUNC %0(i32) + %5:_(i16) = G_TRUNC %1(i32) + %6:_(i16) = G_TRUNC %2(i32) + %7:_(i16) = G_TRUNC %3(i32) + %8:_(<4 x i16>) = G_BUILD_VECTOR %4(i16), %5(i16), %6(i16), %7(i16) + S_NOP 0, implicit %8(<4 x i16>) ... --- @@ -160,73 +163,74 @@ body: | ; GFX78-LABEL: name: build_vector_v5s16 ; GFX78: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL4]] + ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x i16>) + ; ; GFX9-LABEL: name: build_vector_v5s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s16) = G_TRUNC %0 - %6:_(s16) = G_TRUNC %1 - %7:_(s16) = G_TRUNC %2 - %8:_(s16) = G_TRUNC %3 - %9:_(s16) = G_TRUNC %4 - %10:_(<5 x s16>) = G_BUILD_VECTOR %5, %6, %7, %8, %9 - %11:_(<10 x s16>) = G_CONCAT_VECTORS %10, %10 - S_NOP 0, implicit %11 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i16) = G_TRUNC %0(i32) + %6:_(i16) = G_TRUNC %1(i32) + %7:_(i16) = G_TRUNC %2(i32) + %8:_(i16) = G_TRUNC %3(i32) + %9:_(i16) = G_TRUNC %4(i32) + %10:_(<5 x i16>) = G_BUILD_VECTOR %5(i16), %6(i16), %7(i16), %8(i16), %9(i16) + %11:_(<10 x i16>) = G_CONCAT_VECTORS %10(<5 x i16>), %10(<5 x i16>) + S_NOP 0, implicit %11(<10 x i16>) ... --- @@ -238,95 +242,96 @@ body: | ; GFX78-LABEL: name: build_vector_v7s16 ; GFX78: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] - ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] - ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX78-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX78-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; GFX78-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; GFX78-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX78-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] - ; GFX78-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] - ; GFX78-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) - ; GFX78-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; GFX78-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C]] + ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY6]], [[C]] + ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL4]] + ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX78-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX78-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C1]](i32) + ; GFX78-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND10]], [[SHL5]] + ; GFX78-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; GFX78-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C]] + ; GFX78-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[COPY6]], [[C]] + ; GFX78-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND13]], [[C1]](i32) + ; GFX78-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND12]], [[SHL6]] + ; GFX78-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>) + ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x i16>) + ; ; GFX9-LABEL: name: build_vector_v7s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s16) = G_TRUNC %0 - %8:_(s16) = G_TRUNC %1 - %9:_(s16) = G_TRUNC %2 - %10:_(s16) = G_TRUNC %3 - %11:_(s16) = G_TRUNC %4 - %12:_(s16) = G_TRUNC %5 - %13:_(s16) = G_TRUNC %6 - %14:_(<7 x s16>) = G_BUILD_VECTOR %7, %8, %9, %10, %11, %12, %13 - %15:_(<14 x s16>) = G_CONCAT_VECTORS %14, %14 - S_NOP 0, implicit %15 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i16) = G_TRUNC %0(i32) + %8:_(i16) = G_TRUNC %1(i32) + %9:_(i16) = G_TRUNC %2(i32) + %10:_(i16) = G_TRUNC %3(i32) + %11:_(i16) = G_TRUNC %4(i32) + %12:_(i16) = G_TRUNC %5(i32) + %13:_(i16) = G_TRUNC %6(i32) + %14:_(<7 x i16>) = G_BUILD_VECTOR %7(i16), %8(i16), %9(i16), %10(i16), %11(i16), %12(i16), %13(i16) + %15:_(<14 x i16>) = G_CONCAT_VECTORS %14(<7 x i16>), %14(<7 x i16>) + S_NOP 0, implicit %15(<14 x i16>) ... --- @@ -338,81 +343,82 @@ body: | ; GFX78-LABEL: name: build_vector_v8s16 ; GFX78: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX78-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] - ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] - ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] - ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX78-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C]] + ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY6]], [[C]] + ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C]] + ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x i16>) + ; ; GFX9-LABEL: name: build_vector_v8s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s16) = G_TRUNC %0 - %9:_(s16) = G_TRUNC %1 - %10:_(s16) = G_TRUNC %2 - %11:_(s16) = G_TRUNC %3 - %12:_(s16) = G_TRUNC %4 - %13:_(s16) = G_TRUNC %5 - %14:_(s16) = G_TRUNC %6 - %15:_(s16) = G_TRUNC %7 - %16:_(<8 x s16>) = G_BUILD_VECTOR %8, %9, %10, %11, %12, %13, %14, %15 - S_NOP 0, implicit %16 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i16) = G_TRUNC %0(i32) + %9:_(i16) = G_TRUNC %1(i32) + %10:_(i16) = G_TRUNC %2(i32) + %11:_(i16) = G_TRUNC %3(i32) + %12:_(i16) = G_TRUNC %4(i32) + %13:_(i16) = G_TRUNC %5(i32) + %14:_(i16) = G_TRUNC %6(i32) + %15:_(i16) = G_TRUNC %7(i32) + %16:_(<8 x i16>) = G_BUILD_VECTOR %8(i16), %9(i16), %10(i16), %11(i16), %12(i16), %13(i16), %14(i16), %15(i16) + S_NOP 0, implicit %16(<8 x i16>) ... --- @@ -424,143 +430,144 @@ body: | ; GFX78-LABEL: name: build_vector_v16s16 ; GFX78: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX78-NEXT: {{ $}} - ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX78-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX78-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX78-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX78-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX78-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX78-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX78-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX78-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX78-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] - ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] - ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] - ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]] - ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]] - ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX78-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] - ; GFX78-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] - ; GFX78-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; GFX78-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; GFX78-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX78-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] - ; GFX78-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] - ; GFX78-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) - ; GFX78-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; GFX78-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; GFX78-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] - ; GFX78-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] - ; GFX78-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32) - ; GFX78-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] - ; GFX78-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + ; GFX78-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX78-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX78-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX78-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX78-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX78-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX78-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX78-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX78-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX78-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX78-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX78-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX78-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; GFX78-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C]] + ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C1]](i32) + ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY6]], [[C]] + ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C]] + ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[COPY8]], [[C]] + ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[COPY9]], [[C]] + ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL4]] + ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX78-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C]] + ; GFX78-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[COPY11]], [[C]] + ; GFX78-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C1]](i32) + ; GFX78-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND10]], [[SHL5]] + ; GFX78-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; GFX78-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[COPY12]], [[C]] + ; GFX78-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C]] + ; GFX78-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND13]], [[C1]](i32) + ; GFX78-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND12]], [[SHL6]] + ; GFX78-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; GFX78-NEXT: [[AND14:%[0-9]+]]:_(i32) = G_AND [[COPY14]], [[C]] + ; GFX78-NEXT: [[AND15:%[0-9]+]]:_(i32) = G_AND [[COPY15]], [[C]] + ; GFX78-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND15]], [[C1]](i32) + ; GFX78-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND14]], [[SHL7]] + ; GFX78-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x i16>) + ; ; GFX9-LABEL: name: build_vector_v16s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[TRUNC15]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(s32) = COPY $vgpr11 - %12:_(s32) = COPY $vgpr12 - %13:_(s32) = COPY $vgpr13 - %14:_(s32) = COPY $vgpr14 - %15:_(s32) = COPY $vgpr15 - %16:_(s16) = G_TRUNC %0 - %17:_(s16) = G_TRUNC %1 - %18:_(s16) = G_TRUNC %2 - %19:_(s16) = G_TRUNC %3 - %20:_(s16) = G_TRUNC %4 - %21:_(s16) = G_TRUNC %5 - %22:_(s16) = G_TRUNC %6 - %23:_(s16) = G_TRUNC %7 - %24:_(s16) = G_TRUNC %8 - %25:_(s16) = G_TRUNC %9 - %26:_(s16) = G_TRUNC %10 - %27:_(s16) = G_TRUNC %11 - %28:_(s16) = G_TRUNC %12 - %29:_(s16) = G_TRUNC %13 - %30:_(s16) = G_TRUNC %14 - %31:_(s16) = G_TRUNC %15 - %32:_(<16 x s16>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 - S_NOP 0, implicit %32 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY $vgpr15 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY8]](i32) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[COPY13]](i32) + ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY14]](i32) + ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[COPY15]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC12]](i16), [[TRUNC13]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC14]](i16), [[TRUNC15]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i32) = COPY $vgpr8 + %9:_(i32) = COPY $vgpr9 + %10:_(i32) = COPY $vgpr10 + %11:_(i32) = COPY $vgpr11 + %12:_(i32) = COPY $vgpr12 + %13:_(i32) = COPY $vgpr13 + %14:_(i32) = COPY $vgpr14 + %15:_(i32) = COPY $vgpr15 + %16:_(i16) = G_TRUNC %0(i32) + %17:_(i16) = G_TRUNC %1(i32) + %18:_(i16) = G_TRUNC %2(i32) + %19:_(i16) = G_TRUNC %3(i32) + %20:_(i16) = G_TRUNC %4(i32) + %21:_(i16) = G_TRUNC %5(i32) + %22:_(i16) = G_TRUNC %6(i32) + %23:_(i16) = G_TRUNC %7(i32) + %24:_(i16) = G_TRUNC %8(i32) + %25:_(i16) = G_TRUNC %9(i32) + %26:_(i16) = G_TRUNC %10(i32) + %27:_(i16) = G_TRUNC %11(i32) + %28:_(i16) = G_TRUNC %12(i32) + %29:_(i16) = G_TRUNC %13(i32) + %30:_(i16) = G_TRUNC %14(i32) + %31:_(i16) = G_TRUNC %15(i32) + %32:_(<16 x i16>) = G_BUILD_VECTOR %16(i16), %17(i16), %18(i16), %19(i16), %20(i16), %21(i16), %22(i16), %23(i16), %24(i16), %25(i16), %26(i16), %27(i16), %28(i16), %29(i16), %30(i16), %31(i16) + S_NOP 0, implicit %32(<16 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir index f5266cf2d08aa..2c351b803de90 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[COPY]](<2 x i32>), [[COPY1]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -29,14 +29,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v2s16_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -48,18 +48,18 @@ body: | ; CHECK-LABEL: name: concat_vectors_v2s16_v2s16_v2s16_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = COPY $vgpr3 - %4:_(<8 x s16>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>), [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x i16>) = COPY $vgpr3 + %4:_(<8 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>), %3(<2 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<8 x i16>) ... --- @@ -71,14 +71,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<8 x s16>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[COPY]](<4 x i16>), [[COPY1]](<4 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<8 x i16>) = G_CONCAT_VECTORS %0(<4 x i16>), %1(<4 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<8 x i16>) ... --- @@ -90,14 +90,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s32_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3, - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<8 x s32>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[COPY]](<4 x i32>), [[COPY1]](<4 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<8 x i32>) = G_CONCAT_VECTORS %0(<4 x i32>), %1(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<8 x i32>) ... --- @@ -109,19 +109,19 @@ body: | ; CHECK-LABEL: name: concat_vectors_v2s32_v2s32_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>), [[COPY2]](<2 x s32>), [[COPY3]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = COPY $vgpr6_vgpr7 - - %4:_(<8 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[COPY]](<2 x i32>), [[COPY1]](<2 x i32>), [[COPY2]](<2 x i32>), [[COPY3]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i32>) = COPY $vgpr6_vgpr7 + %4:_(<8 x i32>) = G_CONCAT_VECTORS %0(<2 x i32>), %1(<2 x i32>), %2(<2 x i32>), %3(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<8 x i32>) + ... --- name: concat_vectors_v2s64_v2s64 @@ -132,14 +132,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v2s64_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[COPY]](<2 x i64>), [[COPY1]](<2 x i64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i64>) = G_CONCAT_VECTORS %0(<2 x i64>), %1(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<4 x i64>) ... --- @@ -157,8 +157,8 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>) %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x p1>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + %2:_(<4 x p1>) = G_CONCAT_VECTORS %0(<2 x p1>), %1(<2 x p1>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<4 x p1>) ... --- @@ -176,8 +176,8 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p0>) %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x p0>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + %2:_(<4 x p0>) = G_CONCAT_VECTORS %0(<2 x p0>), %1(<2 x p0>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<4 x p0>) ... --- @@ -195,8 +195,8 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>) %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - %2:_(<4 x p3>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(<4 x p3>) = G_CONCAT_VECTORS %0(<2 x p3>), %1(<2 x p3>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x p3>) ... --- @@ -214,8 +214,8 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>) %0:_(<2 x p5>) = COPY $vgpr0_vgpr1 %1:_(<2 x p5>) = COPY $vgpr2_vgpr3 - %2:_(<4 x p5>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(<4 x p5>) = G_CONCAT_VECTORS %0(<2 x p5>), %1(<2 x p5>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x p5>) ... --- @@ -233,8 +233,8 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p999>) %0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + %2:_(<4 x p999>) = G_CONCAT_VECTORS %0(<2 x p999>), %1(<2 x p999>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<4 x p999>) ... --- @@ -246,10 +246,10 @@ body: | ; CHECK-LABEL: name: concat_vectors_v6s16_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir index fa34420b5ae92..8ca02fd5db2aa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -7,10 +7,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_CONSTANT i32 5 - $vgpr0 = COPY %0 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i32) = G_CONSTANT i32 5 + $vgpr0 = COPY %0(i32) ... --- name: test_constant_s64 @@ -18,10 +18,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s64) = G_CONSTANT i64 5 - $vgpr0_vgpr1 = COPY %0 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 5 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](i64) + %0:_(i64) = G_CONSTANT i64 5 + $vgpr0_vgpr1 = COPY %0(i64) ... @@ -31,13 +31,13 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s96 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4780896129847249538 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -547834910 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[C1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = G_CONSTANT i96 -10105770365747857631829412482 - $vgpr0_vgpr1_vgpr2 = COPY %0 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -4780896129847249538 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -547834910 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[C1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = G_CONSTANT i96 -10105770365747857631829412482 + $vgpr0_vgpr1_vgpr2 = COPY %0(i96) ... @@ -47,10 +47,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s1 - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK-NEXT: S_ENDPGM 0, implicit [[C]](s1) - %1:_(s1) = G_CONSTANT i1 0 - S_ENDPGM 0, implicit %1 + ; CHECK: [[C:%[0-9]+]]:_(i1) = G_CONSTANT i1 false + ; CHECK-NEXT: S_ENDPGM 0, implicit [[C]](i1) + %0:_(i1) = G_CONSTANT i1 false + S_ENDPGM 0, implicit %0(i1) ... --- @@ -59,11 +59,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s7 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s7) = G_CONSTANT i7 5 - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i7) = G_CONSTANT i7 5 + %1:_(i32) = G_ANYEXT %0(i7) + $vgpr0 = COPY %1(i32) ... --- @@ -72,11 +72,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s8) = G_CONSTANT i8 5 - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i8) = G_CONSTANT i8 5 + %1:_(i32) = G_ANYEXT %0(i8) + $vgpr0 = COPY %1(i32) ... --- @@ -85,11 +85,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s16) = G_CONSTANT i16 5 - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i16) = G_CONSTANT i16 5 + %1:_(i32) = G_ANYEXT %0(i16) + $vgpr0 = COPY %1(i32) ... --- @@ -98,12 +98,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s128 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = G_CONSTANT i128 5 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 5 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](i64), [[C1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = G_CONSTANT i128 5 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0(i128) ... --- @@ -115,7 +115,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p0) %0:_(p0) = G_CONSTANT i64 0 - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p0) ... --- @@ -127,7 +127,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p1) %0:_(p1) = G_CONSTANT i64 0 - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p1) ... --- @@ -139,7 +139,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p2) = G_CONSTANT i32 0 ; CHECK-NEXT: $vgpr0 = COPY [[C]](p2) %0:_(p2) = G_CONSTANT i32 0 - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p2) ... --- @@ -151,7 +151,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p2) = G_CONSTANT i32 0 ; CHECK-NEXT: $vgpr0 = COPY [[C]](p2) %0:_(p2) = G_CONSTANT i32 0 - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p2) ... --- @@ -163,7 +163,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p4) %0:_(p4) = G_CONSTANT i64 0 - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p4) ... --- @@ -175,7 +175,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 ; CHECK-NEXT: $vgpr0 = COPY [[C]](p5) %0:_(p5) = G_CONSTANT i32 0 - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p5) ... --- @@ -187,5 +187,5 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p999) = G_CONSTANT i64 0 ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p999) %0:_(p999) = G_CONSTANT i64 0 - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p999) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index 68587630e2195..c9c8cc6a267d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTLZ_ZERO_UNDEF %0(i32) + $vgpr0 = COPY %1(i32) ... --- @@ -27,12 +27,12 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTLZ_ZERO_UNDEF %0(i64) + $vgpr0 = COPY %1(i32) ... --- @@ -44,13 +44,13 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s64_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CTLZ_ZERO_UNDEF %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CTLZ_ZERO_UNDEF %0(i64) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -62,13 +62,13 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s16_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_CTLZ_ZERO_UNDEF %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_CTLZ_ZERO_UNDEF %0(i32) + %2:_(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -80,18 +80,18 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s16_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CTLZ_ZERO_UNDEF %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[SHL]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[AMDGPU_FFBH_U32_]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CTLZ_ZERO_UNDEF %1(i16) + %3:_(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -103,15 +103,15 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32) - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](s32), [[CTLZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_CTLZ_ZERO_UNDEF %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[UV]](i32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](i32), [[CTLZ_ZERO_UNDEF1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_CTLZ_ZERO_UNDEF %0(<2 x i32>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -123,15 +123,15 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_v2s32_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64) - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](s32), [[CTLZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_CTLZ_ZERO_UNDEF %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[UV]](i64) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[UV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](i32), [[CTLZ_ZERO_UNDEF1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_CTLZ_ZERO_UNDEF %0(<2 x i64>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -143,24 +143,24 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_v2s16_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_FFBH_U321:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL2]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U321]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_CTLZ_ZERO_UNDEF %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[SHL]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[SHL1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[AMDGPU_FFBH_U32_]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AMDGPU_FFBH_U32_1]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_CTLZ_ZERO_UNDEF %0(<2 x i16>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -173,18 +173,18 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s7_s7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FFBH]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s7) = G_CTLZ_ZERO_UNDEF %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[SHL]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[AMDGPU_FFBH_U32_]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i7) = G_CTLZ_ZERO_UNDEF %1(i7) + %3:_(i32) = G_ZEXT %2(i7) + $vgpr0 = COPY %3(i32) ... --- @@ -197,17 +197,17 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s33_s33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s33) = G_CTLZ_ZERO_UNDEF %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTLZ_ZERO_UNDEF [[SHL]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(i33) = G_CTLZ_ZERO_UNDEF %1(i33) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir index dd116927779b5..62bf3911a1c5f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: ctlz_s32_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTLZ %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTLZ %0(i32) + $vgpr0 = COPY %1(i32) ... --- @@ -29,14 +29,14 @@ body: | ; CHECK-LABEL: name: ctlz_s32_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTLZ %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTLZ %0(i64) + $vgpr0 = COPY %1(i32) ... --- @@ -48,15 +48,15 @@ body: | ; CHECK-LABEL: name: ctlz_s64_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UMIN]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CTLZ %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UMIN]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CTLZ %0(i64) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -68,15 +68,15 @@ body: | ; CHECK-LABEL: name: ctlz_s16_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_CTLZ %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_CTLZ %0(i32) + %2:_(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -88,22 +88,22 @@ body: | ; CHECK-LABEL: name: ctlz_s16_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CTLZ %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[AND]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UMIN]], [[C2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[SUB]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CTLZ %1(i16) + %3:_(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -115,18 +115,18 @@ body: | ; CHECK-LABEL: name: ctlz_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_CTLZ %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_CTLZ %0(<2 x i32>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -138,18 +138,18 @@ body: | ; CHECK-LABEL: name: ctlz_v2s32_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s64) - ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_CTLZ %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i64) + ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_CTLZ %0(<2 x i64>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -161,30 +161,30 @@ body: | ; CHECK-LABEL: name: ctlz_v2s16_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32) - ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_CTLZ %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[AND]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UMIN]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[SUB]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[LSHR]](i32) + ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UMIN1]], [[C]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[SUB1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_CTLZ %0(<2 x i16>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -197,22 +197,22 @@ body: | ; CHECK-LABEL: name: ctlz_s7_s7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s7) = G_CTLZ %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[AND]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UMIN]], [[C2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[SUB]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i7) = G_CTLZ %1(i7) + %3:_(i32) = G_ZEXT %2(i7) + $vgpr0 = COPY %3(i32) ... --- @@ -225,22 +225,22 @@ body: | ; CHECK-LABEL: name: ctlz_s33_s33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UMIN]], [[UV]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s33) = G_CTLZ %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[AND]](i64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 31 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UMIN]], [[UV]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[USUBO]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(i33) = G_CTLZ %1(i33) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... # --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir index f183271cd5f27..5f6f7d4171e2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -9,17 +9,17 @@ body: | ; CHECK-LABEL: name: ctpop_s8_s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s8) = G_CTPOP %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTPOP]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i8) = G_CTPOP %1(i8) + %3:_(i32) = G_ZEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -30,17 +30,17 @@ body: | ; CHECK-LABEL: name: ctpop_s9_s9 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s9) = G_TRUNC %0 - %2:_(s9) = G_CTPOP %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 511 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTPOP]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i9) = G_TRUNC %0(i32) + %2:_(i9) = G_CTPOP %1(i9) + %3:_(i32) = G_ZEXT %2(i9) + $vgpr0 = COPY %3(i32) ... --- @@ -52,12 +52,12 @@ body: | ; CHECK-LABEL: name: ctpop_s32_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTPOP %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTPOP %0(i32) + $vgpr0 = COPY %1(i32) ... --- @@ -69,12 +69,12 @@ body: | ; CHECK-LABEL: name: ctpop_s32_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTPOP %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTPOP %0(i64) + $vgpr0 = COPY %1(i32) ... --- @@ -86,13 +86,13 @@ body: | ; CHECK-LABEL: name: ctpop_s64_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTPOP]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CTPOP %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[COPY]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTPOP]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CTPOP %0(i64) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -104,13 +104,13 @@ body: | ; CHECK-LABEL: name: ctpop_s16_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_CTPOP %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_CTPOP %0(i32) + %2:_(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -122,17 +122,17 @@ body: | ; CHECK-LABEL: name: ctpop_s16_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CTPOP %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTPOP]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CTPOP %1(i16) + %3:_(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -144,15 +144,15 @@ body: | ; CHECK-LABEL: name: ctpop_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[UV]](s32) - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTPOP]](s32), [[CTPOP1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_CTPOP %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[UV]](i32) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(i32) = G_CTPOP [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[CTPOP]](i32), [[CTPOP1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_CTPOP %0(<2 x i32>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -164,15 +164,15 @@ body: | ; CHECK-LABEL: name: ctpop_v2s32_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[UV]](s64) - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[UV1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTPOP]](s32), [[CTPOP1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_CTPOP %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[UV]](i64) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(i32) = G_CTPOP [[UV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[CTPOP]](i32), [[CTPOP1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_CTPOP %0(<2 x i64>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -184,23 +184,23 @@ body: | ; CHECK-LABEL: name: ctpop_v2s16_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_CTPOP %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTPOP]](i32) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(i32) = G_CTPOP [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[CTPOP1]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY2]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_CTPOP %0(<2 x i16>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -213,17 +213,17 @@ body: | ; CHECK-LABEL: name: ctpop_s7_s7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s7) = G_CTPOP %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTPOP]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i7) = G_CTPOP %1(i7) + %3:_(i32) = G_ZEXT %2(i7) + $vgpr0 = COPY %3(i32) ... --- @@ -236,17 +236,17 @@ body: | ; CHECK-LABEL: name: ctpop_s33_s33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTPOP]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s33) = G_CTPOP %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTPOP]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(i33) = G_CTPOP %1(i33) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -257,24 +257,24 @@ body: | ; CHECK-LABEL: name: ctpop_s65_s65 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s64) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTPOP1]], [[CTPOP]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s65) = G_TRUNC %0 - %2:_(s65) = G_CTPOP %1 - %3:_(s32) = G_TRUNC %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i64) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(i32) = G_CTPOP [[AND1]](i64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[CTPOP1]], [[CTPOP]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i65) = G_TRUNC %0(i96) + %2:_(i65) = G_CTPOP %1(i65) + %3:_(i32) = G_TRUNC %2(i65) + $vgpr0 = COPY %3(i32) ... --- @@ -285,21 +285,21 @@ body: | ; CHECK-LABEL: name: ctpop_s32_s65 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s64) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTPOP1]], [[CTPOP]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s65) = G_TRUNC %0 - %2:_(s32) = G_CTPOP %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(i32) = G_CTPOP [[AND]](i64) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(i32) = G_CTPOP [[AND1]](i64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[CTPOP1]], [[CTPOP]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i65) = G_TRUNC %0(i96) + %2:_(i32) = G_CTPOP %1(i65) + $vgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir index ab82a1bb5cc65..2cb76b2b0f4c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTTZ_ZERO_UNDEF %0(i32) + $vgpr0 = COPY %1(i32) ... --- @@ -27,12 +27,12 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTTZ_ZERO_UNDEF %0(i64) + $vgpr0 = COPY %1(i32) ... --- @@ -44,13 +44,13 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s64_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CTTZ_ZERO_UNDEF %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CTTZ_ZERO_UNDEF %0(i64) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -62,15 +62,15 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s16_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[CTTZ_ZERO_UNDEF]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_CTTZ_ZERO_UNDEF %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[CTTZ_ZERO_UNDEF]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_CTTZ_ZERO_UNDEF %0(i32) + %2:_(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -82,17 +82,17 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s16_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CTTZ_ZERO_UNDEF %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CTTZ_ZERO_UNDEF %1(i16) + %3:_(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -104,15 +104,15 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](s32), [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_CTTZ_ZERO_UNDEF %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[UV]](i32) + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](i32), [[CTTZ_ZERO_UNDEF1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_CTTZ_ZERO_UNDEF %0(<2 x i32>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -124,15 +124,15 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_v2s32_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s64) - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](s32), [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_CTTZ_ZERO_UNDEF %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[UV]](i64) + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[UV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](i32), [[CTTZ_ZERO_UNDEF1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_CTTZ_ZERO_UNDEF %0(<2 x i64>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -144,24 +144,24 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_v2s16_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_CTTZ_ZERO_UNDEF %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[BITCAST]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[LSHR]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF1]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_CTTZ_ZERO_UNDEF %0(<2 x i16>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -174,17 +174,17 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s7_s7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s7) = G_CTTZ_ZERO_UNDEF %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i7) = G_CTTZ_ZERO_UNDEF %1(i7) + %3:_(i32) = G_ZEXT %2(i7) + $vgpr0 = COPY %3(i32) ... --- @@ -197,13 +197,13 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s33_s33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s33) = G_CTTZ_ZERO_UNDEF %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(i33) = G_CTTZ_ZERO_UNDEF %1(i33) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index 0ef31a602961c..37cd8681ecbab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: cttz_s32_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTTZ %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTTZ %0(i32) + $vgpr0 = COPY %1(i32) ... --- @@ -29,14 +29,14 @@ body: | ; CHECK-LABEL: name: cttz_s32_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTTZ %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTTZ %0(i64) + $vgpr0 = COPY %1(i32) ... --- @@ -48,15 +48,15 @@ body: | ; CHECK-LABEL: name: cttz_s64_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UMIN]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CTTZ %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UMIN]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CTTZ %0(i64) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -68,15 +68,15 @@ body: | ; CHECK-LABEL: name: cttz_s16_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_CTTZ %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_CTTZ %0(i32) + %2:_(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -88,19 +88,19 @@ body: | ; CHECK-LABEL: name: cttz_s16_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_CTTZ %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[C]] + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[OR]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_CTTZ %1(i16) + %3:_(i32) = G_ZEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -112,18 +112,18 @@ body: | ; CHECK-LABEL: name: cttz_v2s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV1]](s32) - ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_CTTZ %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[UV1]](i32) + ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_CTTZ %0(<2 x i32>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -135,18 +135,18 @@ body: | ; CHECK-LABEL: name: cttz_v2s32_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV1]](s64) - ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_CTTZ %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[UV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBL_B32 [[UV1]](i64) + ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBL_B32_1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_CTTZ %0(<2 x i64>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -158,27 +158,27 @@ body: | ; CHECK-LABEL: name: cttz_v2s16_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_CTTZ %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[OR]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[C1]] + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[OR1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_CTTZ %0(<2 x i16>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -191,19 +191,19 @@ body: | ; CHECK-LABEL: name: cttz_s7_s7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s7) = G_CTTZ %1 - %3:_(s32) = G_ZEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[C]] + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[OR]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i7) = G_CTTZ %1(i7) + %3:_(i32) = G_ZEXT %2(i7) + $vgpr0 = COPY %3(i32) ... --- @@ -216,15 +216,15 @@ body: | ; CHECK-LABEL: name: cttz_s33_s33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]] - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s33) = G_CTTZ %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934592 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[COPY]], [[C]] + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(i32) = G_CTTZ_ZERO_UNDEF [[OR]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(i33) = G_CTTZ %1(i33) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index 93155335e2086..f7168e59d3391 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: extract_vector_elt_1_v2i32 @@ -28,14 +28,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_1_v2i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: extract_vector_elt_2_v2i32 @@ -46,14 +46,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_2_v2i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: extract_vector_elt_0_v3i32 @@ -64,14 +64,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v3i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<3 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: extract_vector_elt_0_v4i32 @@ -82,14 +82,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v4i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<4 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -101,14 +101,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v5i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<5 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<5 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32), %0(i32), %0(i32), %0(i32) + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_EXTRACT_VECTOR_ELT %1(<5 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -120,14 +120,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v6i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<6 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<6 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32) + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_EXTRACT_VECTOR_ELT %1(<6 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -139,14 +139,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v7i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<7 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<7 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32) + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_EXTRACT_VECTOR_ELT %1(<7 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -158,14 +158,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v8i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<8 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<8 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32) + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_EXTRACT_VECTOR_ELT %1(<8 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -177,14 +177,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v16i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<16 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<16 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32), %0(i32) + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i32) = G_EXTRACT_VECTOR_ELT %1(<16 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -196,14 +196,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_var_v2i32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -215,14 +215,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_var_v8i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<8 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... @@ -233,15 +233,15 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(<2 x s8>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(<2 x i8>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i8) = G_EXTRACT_VECTOR_ELT %0(<2 x i8>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -251,16 +251,16 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i16_i32 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[DEF]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i16) = G_EXTRACT_VECTOR_ELT %0(<2 x i16>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -270,15 +270,15 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(<2 x s1>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(<2 x i1>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i1) = G_EXTRACT_VECTOR_ELT %0(<2 x i1>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -288,16 +288,16 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(<2 x s1>) = G_IMPLICIT_DEF - %1:_(s1) = G_CONSTANT i1 false - %4:_(s32) = G_ZEXT %1 - %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %4 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(<2 x i1>) = G_IMPLICIT_DEF + %1:_(i1) = G_CONSTANT i1 false + %2:_(i32) = G_ZEXT %1(i1) + %3:_(i1) = G_EXTRACT_VECTOR_ELT %0(<2 x i1>), %2(i32) + %4:_(i32) = G_ANYEXT %3(i1) + $vgpr0 = COPY %4(i32) ... --- @@ -310,20 +310,20 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s8_varidx_i32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(<2 x s8>) = G_BITCAST %2 - %4:_(s8) = G_EXTRACT_VECTOR_ELT %3, %1 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[LSHR]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(<2 x i8>) = G_BITCAST %2(i16) + %4:_(i8) = G_EXTRACT_VECTOR_ELT %3(<2 x i8>), %1(i32) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -336,17 +336,17 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_0_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(<2 x s8>) = G_BITCAST %2 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s8) = G_EXTRACT_VECTOR_ELT %3, %4 - %6:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(<2 x i8>) = G_BITCAST %2(i16) + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i8) = G_EXTRACT_VECTOR_ELT %3(<2 x i8>), %4(i32) + %6:_(i32) = G_ANYEXT %5(i8) + $vgpr0 = COPY %6(i32) ... --- @@ -359,19 +359,19 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_1_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(<2 x s8>) = G_BITCAST %2 - %4:_(s32) = G_CONSTANT i32 1 - %5:_(s8) = G_EXTRACT_VECTOR_ELT %3, %4 - %6:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(<2 x i8>) = G_BITCAST %2(i16) + %4:_(i32) = G_CONSTANT i32 1 + %5:_(i8) = G_EXTRACT_VECTOR_ELT %3(<2 x i8>), %4(i32) + %6:_(i32) = G_ANYEXT %5(i8) + $vgpr0 = COPY %6(i32) ... --- @@ -384,24 +384,24 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s4_varidx_i32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(<4 x s4>) = G_BITCAST %2 - %4:_(s4) = G_EXTRACT_VECTOR_ELT %3, %1 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(<4 x i4>) = G_BITCAST %2(i16) + %4:_(i4) = G_EXTRACT_VECTOR_ELT %3(<4 x i4>), %1(i32) + %5:_(i32) = G_ANYEXT %4(i4) + $vgpr0 = COPY %5(i32) ... --- @@ -414,16 +414,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s8_varidx_i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(<3 x s8>) = G_TRUNC %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(<3 x i8>) = G_TRUNC %0(<3 x i32>) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<3 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -436,19 +436,19 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s8_varidx_i32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[SHL]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<4 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[SHL]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<4 x i8>) = G_BITCAST %0(i32) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<4 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -461,16 +461,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_0_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(i32) + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i8) = G_EXTRACT_VECTOR_ELT %1(<4 x i8>), %2(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -483,16 +483,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_1_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(i32) + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i8) = G_EXTRACT_VECTOR_ELT %1(<4 x i8>), %2(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -505,16 +505,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_2_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(i32) + %2:_(i32) = G_CONSTANT i32 2 + %3:_(i8) = G_EXTRACT_VECTOR_ELT %1(<4 x i8>), %2(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -527,16 +527,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_3_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(s32) = G_CONSTANT i32 3 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(i32) + %2:_(i32) = G_CONSTANT i32 3 + %3:_(i8) = G_EXTRACT_VECTOR_ELT %1(<4 x i8>), %2(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... @@ -551,24 +551,24 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_varidx_i32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x i32>), [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[EVEC]], [[SHL]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... @@ -582,18 +582,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_0_i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -606,18 +606,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_1_i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -630,18 +630,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_3_i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 3 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 3 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -654,18 +654,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_4_i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 4 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 4 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -678,18 +678,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_5_i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 5 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 5 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -702,18 +702,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_7_i32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 7 - %2:_(<8 x s8>) = G_BITCAST %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 7 + %2:_(<8 x i8>) = G_BITCAST %0(i64) + %3:_(i8) = G_EXTRACT_VECTOR_ELT %2(<8 x i8>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -726,20 +726,20 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_varidx_i32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[SHL]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[SHL]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_EXTRACT_VECTOR_ELT %0(<2 x i16>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -752,16 +752,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx0_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i16) = G_EXTRACT_VECTOR_ELT %0(<2 x i16>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -774,16 +774,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx1_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i16) = G_EXTRACT_VECTOR_ELT %0(<2 x i16>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -796,16 +796,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_varidx_i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(<3 x s16>) = G_TRUNC %0 - %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %3:_(i16) = G_EXTRACT_VECTOR_ELT %2(<3 x i16>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -818,16 +818,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<3 x s16>) = G_TRUNC %0 - %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %3:_(i16) = G_EXTRACT_VECTOR_ELT %2(<3 x i16>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -840,16 +840,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(<3 x s16>) = G_TRUNC %0 - %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %3:_(i16) = G_EXTRACT_VECTOR_ELT %2(<3 x i16>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -862,16 +862,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 2 - %2:_(<3 x s16>) = G_TRUNC %0 - %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 2 + %2:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %3:_(i16) = G_EXTRACT_VECTOR_ELT %2(<3 x i16>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -884,22 +884,22 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s16_varidx_i32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<2 x s32>), [[LSHR]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](s32) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<2 x i32>), [[LSHR]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[EVEC]], [[SHL]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](i32) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_EXTRACT_VECTOR_ELT %0(<4 x i16>), %1(i32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -912,24 +912,24 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s128_varidx_i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[C1]] - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<4 x s64>), [[ADD]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[C2]] - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<4 x s64>), [[ADD1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](s128) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[COPY]](<2 x i128>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL]], [[C1]] + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i64) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<4 x i64>), [[ADD]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[MUL]], [[C2]] + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(i64) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<4 x i64>), [[ADD1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[EVEC]](i64), [[EVEC1]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](i128) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i128) = G_EXTRACT_VECTOR_ELT %0(<2 x i128>), %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -942,16 +942,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2i32_varidx_i64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[TRUNC]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %3:_(s32) = G_TRUNC %1 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x i32>), [[TRUNC]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i32) = G_TRUNC %1(i64) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- name: extract_vector_elt_0_v2i64 @@ -963,14 +963,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](i64) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<2 x i64>), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -983,14 +983,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v8i64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(<8 x s64>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x i64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<8 x i64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(<8 x i64>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1003,14 +1003,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v16i64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(<16 x s64>) = G_IMPLICIT_DEF - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x i64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64), [[UV12:%[0-9]+]]:_(i64), [[UV13:%[0-9]+]]:_(i64), [[UV14:%[0-9]+]]:_(i64), [[UV15:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<16 x i64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(<16 x i64>) = G_IMPLICIT_DEF + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<16 x i64>), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... # Make sure we look through casts looking for a constant index. @@ -1023,15 +1023,15 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = G_CONSTANT i64 0 - %2:_(s32) = G_TRUNC %1 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %0, %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = G_CONSTANT i64 0 + %2:_(i32) = G_TRUNC %1(i64) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %0(<4 x i32>), %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -1045,15 +1045,15 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 4, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<16 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV7]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 7 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 - S_ENDPGM 0, implicit %3 + %1:_(i32) = G_CONSTANT i32 7 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %2(<64 x i32>), %1(i32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -1067,17 +1067,17 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x i32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<16 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 33 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 - S_ENDPGM 0, implicit %3 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %2(<64 x i32>), %1(i32) + S_ENDPGM 0, implicit %3(i32) ... # Test handling of out of bounds indexes @@ -1091,16 +1091,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_64_65_v64s32 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32), implicit [[DEF]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[DEF]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](i32), implicit [[DEF]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 64 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_CONSTANT i32 65 - %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %4 - S_ENDPGM 0, implicit %3, implicit %5 + %1:_(i32) = G_CONSTANT i32 64 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %2(<64 x i32>), %1(i32) + %4:_(i32) = G_CONSTANT i32 65 + %5:_(i32) = G_EXTRACT_VECTOR_ELT %2(<64 x i32>), %4(i32) + S_ENDPGM 0, implicit %3(i32), implicit %5(i32) ... --- @@ -1114,18 +1114,18 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x i32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x i32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3) ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 33 - %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) - %3:_(p3) = G_EXTRACT_VECTOR_ELT %2, %1 - S_ENDPGM 0, implicit %3 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(<64 x p3>) = G_LOAD %0(p1) :: (load (<64 x p3>), align 4, addrspace 4) + %3:_(p3) = G_EXTRACT_VECTOR_ELT %2(<64 x p3>), %1(i32) + S_ENDPGM 0, implicit %3(p3) ... --- @@ -1139,223 +1139,223 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x i32>) from unknown-address + 64, align 4, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x i32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 192 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x i32>) from unknown-address + 192, align 4, addrspace 4) ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD4]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD5]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD6]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD7]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD8]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD9]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD10]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD11]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD12]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD13]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD14]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD15]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD16]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD17]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV16]](s32), [[PTR_ADD18]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV17]](s32), [[PTR_ADD19]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV18]](s32), [[PTR_ADD20]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK-NEXT: G_STORE [[UV19]](s32), [[PTR_ADD21]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK-NEXT: G_STORE [[UV20]](s32), [[PTR_ADD22]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK-NEXT: G_STORE [[UV21]](s32), [[PTR_ADD23]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK-NEXT: G_STORE [[UV22]](s32), [[PTR_ADD24]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK-NEXT: G_STORE [[UV23]](s32), [[PTR_ADD25]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK-NEXT: G_STORE [[UV24]](s32), [[PTR_ADD26]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK-NEXT: G_STORE [[UV25]](s32), [[PTR_ADD27]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK-NEXT: G_STORE [[UV26]](s32), [[PTR_ADD28]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK-NEXT: G_STORE [[UV27]](s32), [[PTR_ADD29]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK-NEXT: G_STORE [[UV28]](s32), [[PTR_ADD30]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK-NEXT: G_STORE [[UV29]](s32), [[PTR_ADD31]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK-NEXT: G_STORE [[UV30]](s32), [[PTR_ADD32]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD33]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD34]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD35]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD36]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD37]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD38]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD39]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD40]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD41]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK-NEXT: G_STORE [[UV40]](s32), [[PTR_ADD42]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK-NEXT: G_STORE [[UV41]](s32), [[PTR_ADD43]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK-NEXT: G_STORE [[UV42]](s32), [[PTR_ADD44]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK-NEXT: G_STORE [[UV43]](s32), [[PTR_ADD45]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK-NEXT: G_STORE [[UV44]](s32), [[PTR_ADD46]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK-NEXT: G_STORE [[UV45]](s32), [[PTR_ADD47]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK-NEXT: G_STORE [[UV46]](s32), [[PTR_ADD48]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK-NEXT: G_STORE [[UV47]](s32), [[PTR_ADD49]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK-NEXT: G_STORE [[UV48]](s32), [[PTR_ADD50]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK-NEXT: G_STORE [[UV49]](s32), [[PTR_ADD51]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK-NEXT: G_STORE [[UV50]](s32), [[PTR_ADD52]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK-NEXT: G_STORE [[UV51]](s32), [[PTR_ADD53]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK-NEXT: G_STORE [[UV52]](s32), [[PTR_ADD54]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK-NEXT: G_STORE [[UV53]](s32), [[PTR_ADD55]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK-NEXT: G_STORE [[UV54]](s32), [[PTR_ADD56]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK-NEXT: G_STORE [[UV55]](s32), [[PTR_ADD57]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK-NEXT: G_STORE [[UV56]](s32), [[PTR_ADD58]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK-NEXT: G_STORE [[UV57]](s32), [[PTR_ADD59]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK-NEXT: G_STORE [[UV58]](s32), [[PTR_ADD60]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK-NEXT: G_STORE [[UV59]](s32), [[PTR_ADD61]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK-NEXT: G_STORE [[UV60]](s32), [[PTR_ADD62]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK-NEXT: G_STORE [[UV61]](s32), [[PTR_ADD63]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK-NEXT: G_STORE [[UV62]](s32), [[PTR_ADD64]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK-NEXT: G_STORE [[UV63]](s32), [[PTR_ADD65]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C66]] - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C3]] - ; CHECK-NEXT: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s32) - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD66]](p5) :: (load (s32), addrspace 5) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LOAD4]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<16 x i32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<16 x i32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32), [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32), [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32), [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32), [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32), [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32), [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<16 x i32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32), [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32), [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32), [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32), [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32), [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32), [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32), [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD3]](<16 x i32>) + ; CHECK-NEXT: G_STORE [[UV]](i32), [[FRAME_INDEX]](p5) :: (store (i32) into %stack.0, align 256, addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](i32) + ; CHECK-NEXT: G_STORE [[UV1]](i32), [[PTR_ADD3]](p5) :: (store (i32) into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](i32) + ; CHECK-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD4]](p5) :: (store (i32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](i32) + ; CHECK-NEXT: G_STORE [[UV3]](i32), [[PTR_ADD5]](p5) :: (store (i32) into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](i32) + ; CHECK-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD6]](p5) :: (store (i32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](i32) + ; CHECK-NEXT: G_STORE [[UV5]](i32), [[PTR_ADD7]](p5) :: (store (i32) into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](i32) + ; CHECK-NEXT: G_STORE [[UV6]](i32), [[PTR_ADD8]](p5) :: (store (i32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](i32) + ; CHECK-NEXT: G_STORE [[UV7]](i32), [[PTR_ADD9]](p5) :: (store (i32) into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](i32) + ; CHECK-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD10]](p5) :: (store (i32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](i32) + ; CHECK-NEXT: G_STORE [[UV9]](i32), [[PTR_ADD11]](p5) :: (store (i32) into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](i32) + ; CHECK-NEXT: G_STORE [[UV10]](i32), [[PTR_ADD12]](p5) :: (store (i32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](i32) + ; CHECK-NEXT: G_STORE [[UV11]](i32), [[PTR_ADD13]](p5) :: (store (i32) into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](i32) + ; CHECK-NEXT: G_STORE [[UV12]](i32), [[PTR_ADD14]](p5) :: (store (i32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](i32) + ; CHECK-NEXT: G_STORE [[UV13]](i32), [[PTR_ADD15]](p5) :: (store (i32) into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](i32) + ; CHECK-NEXT: G_STORE [[UV14]](i32), [[PTR_ADD16]](p5) :: (store (i32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](i32) + ; CHECK-NEXT: G_STORE [[UV15]](i32), [[PTR_ADD17]](p5) :: (store (i32) into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](i32) + ; CHECK-NEXT: G_STORE [[UV16]](i32), [[PTR_ADD18]](p5) :: (store (i32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 68 + ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](i32) + ; CHECK-NEXT: G_STORE [[UV17]](i32), [[PTR_ADD19]](p5) :: (store (i32) into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 72 + ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](i32) + ; CHECK-NEXT: G_STORE [[UV18]](i32), [[PTR_ADD20]](p5) :: (store (i32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 76 + ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](i32) + ; CHECK-NEXT: G_STORE [[UV19]](i32), [[PTR_ADD21]](p5) :: (store (i32) into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 80 + ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](i32) + ; CHECK-NEXT: G_STORE [[UV20]](i32), [[PTR_ADD22]](p5) :: (store (i32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 84 + ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](i32) + ; CHECK-NEXT: G_STORE [[UV21]](i32), [[PTR_ADD23]](p5) :: (store (i32) into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 88 + ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](i32) + ; CHECK-NEXT: G_STORE [[UV22]](i32), [[PTR_ADD24]](p5) :: (store (i32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 92 + ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](i32) + ; CHECK-NEXT: G_STORE [[UV23]](i32), [[PTR_ADD25]](p5) :: (store (i32) into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 96 + ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](i32) + ; CHECK-NEXT: G_STORE [[UV24]](i32), [[PTR_ADD26]](p5) :: (store (i32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](i32) + ; CHECK-NEXT: G_STORE [[UV25]](i32), [[PTR_ADD27]](p5) :: (store (i32) into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 104 + ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](i32) + ; CHECK-NEXT: G_STORE [[UV26]](i32), [[PTR_ADD28]](p5) :: (store (i32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 108 + ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](i32) + ; CHECK-NEXT: G_STORE [[UV27]](i32), [[PTR_ADD29]](p5) :: (store (i32) into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 112 + ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](i32) + ; CHECK-NEXT: G_STORE [[UV28]](i32), [[PTR_ADD30]](p5) :: (store (i32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C31:%[0-9]+]]:_(i32) = G_CONSTANT i32 116 + ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](i32) + ; CHECK-NEXT: G_STORE [[UV29]](i32), [[PTR_ADD31]](p5) :: (store (i32) into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C32:%[0-9]+]]:_(i32) = G_CONSTANT i32 120 + ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](i32) + ; CHECK-NEXT: G_STORE [[UV30]](i32), [[PTR_ADD32]](p5) :: (store (i32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C33:%[0-9]+]]:_(i32) = G_CONSTANT i32 124 + ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](i32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[PTR_ADD33]](p5) :: (store (i32) into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C34:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](i32) + ; CHECK-NEXT: G_STORE [[UV32]](i32), [[PTR_ADD34]](p5) :: (store (i32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C35:%[0-9]+]]:_(i32) = G_CONSTANT i32 132 + ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](i32) + ; CHECK-NEXT: G_STORE [[UV33]](i32), [[PTR_ADD35]](p5) :: (store (i32) into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C36:%[0-9]+]]:_(i32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](i32) + ; CHECK-NEXT: G_STORE [[UV34]](i32), [[PTR_ADD36]](p5) :: (store (i32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C37:%[0-9]+]]:_(i32) = G_CONSTANT i32 140 + ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](i32) + ; CHECK-NEXT: G_STORE [[UV35]](i32), [[PTR_ADD37]](p5) :: (store (i32) into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C38:%[0-9]+]]:_(i32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](i32) + ; CHECK-NEXT: G_STORE [[UV36]](i32), [[PTR_ADD38]](p5) :: (store (i32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C39:%[0-9]+]]:_(i32) = G_CONSTANT i32 148 + ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](i32) + ; CHECK-NEXT: G_STORE [[UV37]](i32), [[PTR_ADD39]](p5) :: (store (i32) into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C40:%[0-9]+]]:_(i32) = G_CONSTANT i32 152 + ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](i32) + ; CHECK-NEXT: G_STORE [[UV38]](i32), [[PTR_ADD40]](p5) :: (store (i32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C41:%[0-9]+]]:_(i32) = G_CONSTANT i32 156 + ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](i32) + ; CHECK-NEXT: G_STORE [[UV39]](i32), [[PTR_ADD41]](p5) :: (store (i32) into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C42:%[0-9]+]]:_(i32) = G_CONSTANT i32 160 + ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](i32) + ; CHECK-NEXT: G_STORE [[UV40]](i32), [[PTR_ADD42]](p5) :: (store (i32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C43:%[0-9]+]]:_(i32) = G_CONSTANT i32 164 + ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](i32) + ; CHECK-NEXT: G_STORE [[UV41]](i32), [[PTR_ADD43]](p5) :: (store (i32) into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C44:%[0-9]+]]:_(i32) = G_CONSTANT i32 168 + ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](i32) + ; CHECK-NEXT: G_STORE [[UV42]](i32), [[PTR_ADD44]](p5) :: (store (i32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C45:%[0-9]+]]:_(i32) = G_CONSTANT i32 172 + ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](i32) + ; CHECK-NEXT: G_STORE [[UV43]](i32), [[PTR_ADD45]](p5) :: (store (i32) into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C46:%[0-9]+]]:_(i32) = G_CONSTANT i32 176 + ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](i32) + ; CHECK-NEXT: G_STORE [[UV44]](i32), [[PTR_ADD46]](p5) :: (store (i32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C47:%[0-9]+]]:_(i32) = G_CONSTANT i32 180 + ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](i32) + ; CHECK-NEXT: G_STORE [[UV45]](i32), [[PTR_ADD47]](p5) :: (store (i32) into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C48:%[0-9]+]]:_(i32) = G_CONSTANT i32 184 + ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](i32) + ; CHECK-NEXT: G_STORE [[UV46]](i32), [[PTR_ADD48]](p5) :: (store (i32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C49:%[0-9]+]]:_(i32) = G_CONSTANT i32 188 + ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](i32) + ; CHECK-NEXT: G_STORE [[UV47]](i32), [[PTR_ADD49]](p5) :: (store (i32) into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C50:%[0-9]+]]:_(i32) = G_CONSTANT i32 192 + ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](i32) + ; CHECK-NEXT: G_STORE [[UV48]](i32), [[PTR_ADD50]](p5) :: (store (i32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C51:%[0-9]+]]:_(i32) = G_CONSTANT i32 196 + ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](i32) + ; CHECK-NEXT: G_STORE [[UV49]](i32), [[PTR_ADD51]](p5) :: (store (i32) into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C52:%[0-9]+]]:_(i32) = G_CONSTANT i32 200 + ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](i32) + ; CHECK-NEXT: G_STORE [[UV50]](i32), [[PTR_ADD52]](p5) :: (store (i32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C53:%[0-9]+]]:_(i32) = G_CONSTANT i32 204 + ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](i32) + ; CHECK-NEXT: G_STORE [[UV51]](i32), [[PTR_ADD53]](p5) :: (store (i32) into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C54:%[0-9]+]]:_(i32) = G_CONSTANT i32 208 + ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](i32) + ; CHECK-NEXT: G_STORE [[UV52]](i32), [[PTR_ADD54]](p5) :: (store (i32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C55:%[0-9]+]]:_(i32) = G_CONSTANT i32 212 + ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](i32) + ; CHECK-NEXT: G_STORE [[UV53]](i32), [[PTR_ADD55]](p5) :: (store (i32) into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C56:%[0-9]+]]:_(i32) = G_CONSTANT i32 216 + ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](i32) + ; CHECK-NEXT: G_STORE [[UV54]](i32), [[PTR_ADD56]](p5) :: (store (i32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C57:%[0-9]+]]:_(i32) = G_CONSTANT i32 220 + ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](i32) + ; CHECK-NEXT: G_STORE [[UV55]](i32), [[PTR_ADD57]](p5) :: (store (i32) into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C58:%[0-9]+]]:_(i32) = G_CONSTANT i32 224 + ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](i32) + ; CHECK-NEXT: G_STORE [[UV56]](i32), [[PTR_ADD58]](p5) :: (store (i32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C59:%[0-9]+]]:_(i32) = G_CONSTANT i32 228 + ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](i32) + ; CHECK-NEXT: G_STORE [[UV57]](i32), [[PTR_ADD59]](p5) :: (store (i32) into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C60:%[0-9]+]]:_(i32) = G_CONSTANT i32 232 + ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](i32) + ; CHECK-NEXT: G_STORE [[UV58]](i32), [[PTR_ADD60]](p5) :: (store (i32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C61:%[0-9]+]]:_(i32) = G_CONSTANT i32 236 + ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](i32) + ; CHECK-NEXT: G_STORE [[UV59]](i32), [[PTR_ADD61]](p5) :: (store (i32) into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C62:%[0-9]+]]:_(i32) = G_CONSTANT i32 240 + ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](i32) + ; CHECK-NEXT: G_STORE [[UV60]](i32), [[PTR_ADD62]](p5) :: (store (i32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C63:%[0-9]+]]:_(i32) = G_CONSTANT i32 244 + ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](i32) + ; CHECK-NEXT: G_STORE [[UV61]](i32), [[PTR_ADD63]](p5) :: (store (i32) into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C64:%[0-9]+]]:_(i32) = G_CONSTANT i32 248 + ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](i32) + ; CHECK-NEXT: G_STORE [[UV62]](i32), [[PTR_ADD64]](p5) :: (store (i32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C65:%[0-9]+]]:_(i32) = G_CONSTANT i32 252 + ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](i32) + ; CHECK-NEXT: G_STORE [[UV63]](i32), [[PTR_ADD65]](p5) :: (store (i32) into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C66:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C66]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[C3]] + ; CHECK-NEXT: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](i32) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD66]](p5) :: (load (i32), addrspace 5) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LOAD4]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 - S_ENDPGM 0, implicit %3 + %1:_(i32) = COPY $sgpr2 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_EXTRACT_VECTOR_ELT %2(<64 x i32>), %1(i32) + S_ENDPGM 0, implicit %3(i32) ... --- @@ -1368,20 +1368,20 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v32s1_varidx_i32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[SHL]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<32 x s1>) = G_BITCAST %0 - %3:_(s1) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[SHL]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<32 x i1>) = G_BITCAST %0(i32) + %3:_(i1) = G_EXTRACT_VECTOR_ELT %2(<32 x i1>), %1(i32) + %4:_(i32) = G_ANYEXT %3(i1) + $vgpr0 = COPY %4(i32) ... --- @@ -1393,22 +1393,22 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v12s8_varidx_s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<12 x s8>) = G_BITCAST %0 - %2:_(s32) = COPY $vgpr3 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x i32>), [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[EVEC]], [[SHL]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<12 x i8>) = G_BITCAST %0(<3 x i32>) + %2:_(i32) = COPY $vgpr3 + %3:_(i8) = G_EXTRACT_VECTOR_ELT %1(<12 x i8>), %2(i32) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -1420,20 +1420,20 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s8_varidx_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(<3 x s8>) = G_BITCAST %2 - %4:_(s8) = G_EXTRACT_VECTOR_ELT %3, %1 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[LSHR]](i32), [[LSHR1]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(<3 x i8>) = G_BITCAST %2(i24) + %4:_(i8) = G_EXTRACT_VECTOR_ELT %3(<3 x i8>), %1(i32) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index 6630300bcc96b..b0de203f3752c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s64_offset0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i64), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_EXTRACT %0(i64), 0 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s64_offset32 @@ -26,12 +26,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s64_offset32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i64), 32 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_EXTRACT %0(i64), 32 + $vgpr0 = COPY %1(i32) ... --- @@ -43,14 +43,14 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s15_offset0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_TRUNC %0 - %2:_(s8) = G_EXTRACT %1, 0 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i31) = G_TRUNC %0(i64) + %2:_(i8) = G_EXTRACT %1(i31), 0 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -62,14 +62,14 @@ body: | ; CHECK-LABEL: name: test_extract_s16_s31_offset0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_TRUNC %0 - %2:_(s16) = G_EXTRACT %1, 0 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i31) = G_TRUNC %0(i64) + %2:_(i16) = G_EXTRACT %1(i31), 0 + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -81,13 +81,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s48_offset0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_TRUNC %0 - %2:_(s32) = G_EXTRACT %1, 0 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i64), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i48) = G_TRUNC %0(i64) + %2:_(i32) = G_EXTRACT %1(i48), 0 + $vgpr0 = COPY %2(i32) ... --- @@ -99,12 +99,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s96_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_EXTRACT %0(i96), 0 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s96_offset32 @@ -115,12 +115,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s96_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 32 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_EXTRACT %0(i96), 32 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s96_offset64 @@ -131,12 +131,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s96_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_EXTRACT %0, 64 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 64 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_EXTRACT %0(i96), 64 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s128_offset0 @@ -147,12 +147,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s128_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 0 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s128_offset32 @@ -163,12 +163,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s128_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 32 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 32 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s128_offset64 @@ -179,12 +179,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s128_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 64 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 64 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 64 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_s128_offset96 @@ -195,12 +195,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_s128_offset96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 96 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 96 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 96 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 96 + $vgpr0 = COPY %1(i32) ... --- @@ -212,13 +212,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v2s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_EXTRACT %0(<2 x i32>), 0 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v2s32_offset32 @@ -229,13 +229,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v2s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_EXTRACT %0(<2 x i32>), 32 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v3s32_offset0 @@ -246,13 +246,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_EXTRACT %0(<3 x i32>), 0 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v3s32_offset32 @@ -263,13 +263,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_EXTRACT %0(<3 x i32>), 32 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v3s32_offset64 @@ -280,13 +280,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_EXTRACT %0, 64 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_EXTRACT %0(<3 x i32>), 64 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v4s32_offset0 @@ -297,13 +297,13 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v4s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](i32) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(<4 x i32>), 0 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v4s32_offset32 @@ -314,12 +314,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v4s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 32 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 32 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v4s32_offset64 @@ -330,12 +330,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v4s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 64 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 64 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 64 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_s32_v4s32_offset96 @@ -346,12 +346,12 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v4s32_offset96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 96 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_EXTRACT %0, 96 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i128), 96 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_EXTRACT %0(i128), 96 + $vgpr0 = COPY %1(i32) ... --- name: test_extract_v2s32_v4s32_offset0 @@ -362,12 +362,12 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[UV]](<2 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_EXTRACT %0, 0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[UV]](<2 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_EXTRACT %0(<4 x i32>), 0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -379,13 +379,13 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_EXTRACT %0, 32 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[UV2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_EXTRACT %0(<4 x i32>), 32 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -397,12 +397,12 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[UV1]](<2 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_EXTRACT %0, 64 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[UV1]](<2 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_EXTRACT %0(<4 x i32>), 64 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- name: test_extract_s64_v4s32_offset0 @@ -413,13 +413,13 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = G_EXTRACT %0, 0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = G_EXTRACT %0(<4 x i32>), 0 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -431,13 +431,13 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = G_EXTRACT %0, 32 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV1]](i32), [[UV2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = G_EXTRACT %0(<4 x i32>), 32 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -449,13 +449,13 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = G_EXTRACT %0, 64 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = G_EXTRACT %0(<4 x i32>), 64 + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_extract_p0_v4s32_offset0 @@ -466,13 +466,13 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(p0) = G_EXTRACT %0, 0 - $vgpr0_vgpr1 = COPY %1 + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(p0) = G_EXTRACT %0(<4 x i32>), 0 + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -484,13 +484,13 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV1]](i32), [[UV2]](i32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(p0) = G_EXTRACT %0, 32 - $vgpr0_vgpr1 = COPY %1 + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(p0) = G_EXTRACT %0(<4 x i32>), 32 + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -502,13 +502,13 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV2]](i32), [[UV3]](i32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(p0) = G_EXTRACT %0, 64 - $vgpr0_vgpr1 = COPY %1 + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(p0) = G_EXTRACT %0(<4 x i32>), 64 + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -516,13 +516,13 @@ name: extract_s8_v4s8_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(s8) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(i8) = G_EXTRACT %0(<4 x i8>), 0 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -530,13 +530,13 @@ name: extract_s8_v4s8_offset8 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(s8) = G_EXTRACT %0, 8 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](i32) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(i8) = G_EXTRACT %0(<4 x i8>), 8 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -544,13 +544,13 @@ name: extract_s8_v4s8_offset16 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(s8) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](i32) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(i8) = G_EXTRACT %0(<4 x i8>), 16 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -558,13 +558,13 @@ name: extract_s8_v4s8_offset24 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset24 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(s8) = G_EXTRACT %0, 24 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](i32) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(i8) = G_EXTRACT %0(<4 x i8>), 24 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... # FIXME: Leaves behind dead G_TRUNC @@ -573,13 +573,13 @@ name: extract_s8_v3s8_offset16 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v3s8_offset16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - %0:_(<3 x s8>) = G_IMPLICIT_DEF - %1:_(s8) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](i32) + %0:_(<3 x i8>) = G_IMPLICIT_DEF + %1:_(i8) = G_EXTRACT %0(<3 x i8>), 16 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -587,13 +587,13 @@ name: extract_s8_v5s1_offset4 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v5s1_offset4 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32) - %0:_(<5 x s1>) = G_IMPLICIT_DEF - %1:_(s1) = G_EXTRACT %0, 4 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<5 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](i32) + %0:_(<5 x i1>) = G_IMPLICIT_DEF + %1:_(i1) = G_EXTRACT %0(<5 x i1>), 4 + %2:_(i32) = G_ANYEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -601,12 +601,12 @@ name: extract_v2s16_v4s16_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>) - %0:_(<4 x s16>) = G_IMPLICIT_DEF - %1:_(<2 x s16>) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x i16>) + %0:_(<4 x i16>) = G_IMPLICIT_DEF + %1:_(<2 x i16>) = G_EXTRACT %0(<4 x i16>), 32 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -614,12 +614,12 @@ name: extract_v2s16_v6s16_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>) - %0:_(<6 x s16>) = G_IMPLICIT_DEF - %1:_(<2 x s16>) = G_EXTRACT %0, 32 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x i16>) + %0:_(<6 x i16>) = G_IMPLICIT_DEF + %1:_(<2 x i16>) = G_EXTRACT %0(<6 x i16>), 32 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -630,13 +630,13 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s16_offset0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s8) = G_EXTRACT %1, 0 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i8) = G_EXTRACT %1(i16), 0 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -647,17 +647,17 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s16_offset1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s8) = G_EXTRACT %1, 1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C]](i16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i8) = G_EXTRACT %1(i16), 1 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -668,17 +668,17 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s16_offset8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s8) = G_EXTRACT %1, 8 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C]](i16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i8) = G_EXTRACT %1(i16), 8 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -689,12 +689,12 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s32_offset0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_EXTRACT %0(i32), 0 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -705,14 +705,14 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s32_offset1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 1 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_EXTRACT %0(i32), 1 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -723,14 +723,14 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s32_offset8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 8 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_EXTRACT %0(i32), 8 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -741,14 +741,14 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s32_offset16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_EXTRACT %0(i32), 16 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -759,14 +759,14 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s32_offset24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_EXTRACT %0(i32), 16 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -778,12 +778,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_EXTRACT %0(p3), 0 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -795,14 +795,14 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s8) = G_EXTRACT %0, 8 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_EXTRACT %0(p3), 8 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -813,13 +813,13 @@ body: | ; CHECK-LABEL: name: test_extract_s1_s8_offset0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s1) = G_EXTRACT %1, 0 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i1) = G_EXTRACT %1(i8), 0 + %3:_(i32) = G_ANYEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -830,17 +830,17 @@ body: | ; CHECK-LABEL: name: test_extract_s1_s8_offset2 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s1) = G_EXTRACT %1, 2 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C]](i16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i1) = G_EXTRACT %1(i8), 2 + %3:_(i32) = G_ANYEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -851,15 +851,15 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s64_offset2 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_EXTRACT %0, 2 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i8) = G_EXTRACT %0(i64), 2 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -868,14 +868,14 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s16_v3s16_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(s16) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(i16) = G_EXTRACT %0(<3 x i16>), 0 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -886,15 +886,15 @@ body: | ; CHECK-LABEL: name: test_extract_s8_s64_offset16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i8) = G_EXTRACT %0(i64), 16 + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -905,14 +905,14 @@ body: | ; CHECK-LABEL: name: test_extract_s16_s64_offset16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 16 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[COPY]](i64), 16 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i16) = G_EXTRACT %0(i64), 16 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -923,14 +923,14 @@ body: | ; CHECK-LABEL: name: test_extract_s16_s64_offset32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_EXTRACT %0, 32 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[COPY]](i64), 32 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i16) = G_EXTRACT %0(i64), 32 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -941,14 +941,14 @@ body: | ; CHECK-LABEL: name: test_extract_s16_s64_offset48 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 48 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_EXTRACT %0, 48 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[COPY]](i64), 48 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i16) = G_EXTRACT %0(i64), 48 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -957,12 +957,12 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<2 x s16>) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<2 x i16>) = G_EXTRACT %0(<3 x i16>), 0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -971,12 +971,12 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - %0:_(<5 x s16>) = G_IMPLICIT_DEF - %1:_(<2 x s16>) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + %0:_(<5 x i16>) = G_IMPLICIT_DEF + %1:_(<2 x i16>) = G_EXTRACT %0(<5 x i16>), 0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -988,13 +988,13 @@ body: | ; CHECK-LABEL: name: extract_s16_v2s16_offset0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(<2 x i16>), 0 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1006,15 +1006,15 @@ body: | ; CHECK-LABEL: name: extract_s16_v2s16_offset1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 1 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(<2 x i16>), 1 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1026,15 +1026,15 @@ body: | ; CHECK-LABEL: name: extract_s16_v2s16_offset8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 8 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(<2 x i16>), 8 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1046,15 +1046,15 @@ body: | ; CHECK-LABEL: name: extract_s16_v2s16_offset16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(<2 x i16>), 16 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1066,12 +1066,12 @@ body: | ; CHECK-LABEL: name: extract_s16_s32_offset0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(i32), 0 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1083,14 +1083,14 @@ body: | ; CHECK-LABEL: name: extract_s16_s32_offset1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 1 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(i32), 1 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1102,14 +1102,14 @@ body: | ; CHECK-LABEL: name: extract_s16_s32_offset8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 8 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(i32), 8 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1121,14 +1121,14 @@ body: | ; CHECK-LABEL: name: extract_s16_s32_offset16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 16 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_EXTRACT %0(i32), 16 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1141,13 +1141,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[COPY]](p3), 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_EXTRACT %0(p3), 0 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -1160,11 +1160,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i16) = G_EXTRACT [[COPY]](p3), 1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[EXTRACT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_EXTRACT %0, 1 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_EXTRACT %0(p3), 1 + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir index 805890a75d402..5797b6973da61 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir @@ -7,20 +7,20 @@ body: | bb.0: ; CHECK-LABEL: name: f ; CHECK: SI_RETURN - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 -1 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %1(i32) + %3:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %2(i32), %1(i32) + %4:_(i32) = G_CONSTANT i32 2 + %5:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %4(i32) + %6:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %5(i32), %4(i32) + %7:_(i1) = G_CONSTANT i1 true + %8:_(i32) = G_ZEXT %7(i1) + %9:_(i32) = G_EXTRACT_VECTOR_ELT %0(<2 x i32>), %8(i32) + %10:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %9(i32), %8(i32) + SI_RETURN - %2:_(s32) = G_CONSTANT i32 -1 - %3:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %2(s32) - %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %3(s32), %2(s32) - %5:_(s32) = G_CONSTANT i32 2 - %6:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %5(s32) - %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %6(s32), %5(s32) - %8:_(s1) = G_CONSTANT i1 1 - %11:_(s32) = G_ZEXT %8 - %9:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %11(s32) - %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %9(s32), %11(s32) - SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index 801d2e918f087..0f2dee236ba04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -15,26 +15,34 @@ body: | ; SI-LABEL: name: test_fabs_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FABS]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; VI-LABEL: name: test_fabs_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FABS]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-LABEL: name: test_fabs_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FABS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FABS]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FABS %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -46,26 +54,34 @@ body: | ; SI-LABEL: name: test_fabs_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FABS]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; VI-LABEL: name: test_fabs_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FABS]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX9-LABEL: name: test_fabs_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FABS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FABS]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FABS %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_fabs_s16 @@ -76,34 +92,42 @@ body: | ; SI-LABEL: name: test_fabs_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FABS]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fabs_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FABS]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fabs_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FABS %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FABS]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FABS %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -115,35 +139,43 @@ body: | ; SI-LABEL: name: test_fabs_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[UV]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[UV]] + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FABS]](f32), [[FABS1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) ; ; VI-LABEL: name: test_fabs_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[UV]] - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[UV]] + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FABS]](f32), [[FABS1]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) ; ; GFX9-LABEL: name: test_fabs_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[UV]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FABS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[UV]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FABS]](f32), [[FABS1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FABS %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -155,38 +187,46 @@ body: | ; SI-LABEL: name: test_fabs_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[UV]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] - ; SI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[UV]] + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[UV1]] + ; SI-NEXT: [[FABS2:%[0-9]+]]:_(f32) = G_FABS [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FABS]](f32), [[FABS1]](f32), [[FABS2]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) ; ; VI-LABEL: name: test_fabs_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[UV]] - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] - ; VI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[UV]] + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[UV1]] + ; VI-NEXT: [[FABS2:%[0-9]+]]:_(f32) = G_FABS [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FABS]](f32), [[FABS1]](f32), [[FABS2]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) ; ; GFX9-LABEL: name: test_fabs_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[UV]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] - ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FABS %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[UV]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[UV1]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(f32) = G_FABS [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FABS]](f32), [[FABS1]](f32), [[FABS2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FABS %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -198,35 +238,43 @@ body: | ; SI-LABEL: name: test_fabs_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[UV]] + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(f64) = G_FABS [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FABS]](f64), [[FABS1]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) ; ; VI-LABEL: name: test_fabs_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]] - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[UV]] + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(f64) = G_FABS [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FABS]](f64), [[FABS1]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) ; ; GFX9-LABEL: name: test_fabs_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FABS %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[UV]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f64) = G_FABS [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FABS]](f64), [[FABS1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FABS %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -238,26 +286,34 @@ body: | ; SI-LABEL: name: test_fabs_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; VI-LABEL: name: test_fabs_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: test_fabs_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FABS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FABS %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -266,90 +322,151 @@ body: | bb.0: ; SI-LABEL: name: test_fabs_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %64(i16) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV]] + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BITCAST5]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %50(i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %45(i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS1]](<2 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST11]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST12]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST12]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL3]] + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x i16>), [[BITCAST20]](<2 x i16>), [[BITCAST21]](<2 x i16>) + ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fabs_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %64(i16) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV]] + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BITCAST5]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %50(i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %45(i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS1]](<2 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST11]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST12]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST12]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL3]] + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x i16>), [[BITCAST20]](<2 x i16>), [[BITCAST21]](<2 x i16>) + ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_fabs_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FABS %0 - %2:_(<6 x s16>) = G_CONCAT_VECTORS %1, %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST]](f16), [[DEF1]](f16) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x f16>) = G_FABS [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %46(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FABS]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST10]](i16), [[BITCAST11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[BITCAST15]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FABS %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %4:_(<6 x i16>) = G_CONCAT_VECTORS %2(<3 x i16>), %3(<3 x i16>) + S_NOP 0, implicit %4(<6 x i16>) ... --- @@ -361,33 +478,41 @@ body: | ; SI-LABEL: name: test_fabs_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV]] + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV1]] + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FABS]](<2 x f16>), [[FABS1]](<2 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) ; ; VI-LABEL: name: test_fabs_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV]] + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV1]] + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FABS]](<2 x f16>), [[FABS1]](<2 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) ; ; GFX9-LABEL: name: test_fabs_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FABS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x f16>) = G_FABS [[UV1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FABS]](<2 x f16>), [[FABS1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FABS %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index 8b6e53cb78267..942ca5a051d58 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -14,30 +14,42 @@ body: | ; SI-LABEL: name: test_fadd_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fadd_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fadd_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FADD %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -49,30 +61,42 @@ body: | ; SI-LABEL: name: test_fadd_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; VI-LABEL: name: test_fadd_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-LABEL: name: test_fadd_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FADD %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -84,46 +108,58 @@ body: | ; SI-LABEL: name: test_fadd_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fadd_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fadd_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FADD %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) - %4:_(s16) = G_FADD %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 ... --- @@ -135,42 +171,54 @@ body: | ; SI-LABEL: name: test_fadd_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[UV2]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[UV2]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fadd_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[UV2]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[UV2]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fadd_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[UV2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FADD %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -182,42 +230,54 @@ body: | ; SI-LABEL: name: test_fadd_v2s32_flags ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[UV]], [[UV2]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[UV]], [[UV2]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan G_FADD [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fadd_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[UV]], [[UV2]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[UV]], [[UV2]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan G_FADD [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fadd_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[UV]], [[UV2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = nnan G_FADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan G_FADD [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = nnan G_FADD %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -229,45 +289,57 @@ body: | ; SI-LABEL: name: test_fadd_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[UV3]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV4]] - ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[UV3]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[UV4]] + ; SI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; VI-LABEL: name: test_fadd_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[UV3]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV4]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[UV3]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[UV4]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; GFX9-LABEL: name: test_fadd_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[UV3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV4]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_FADD %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[UV3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[UV4]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %3:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %4:_(<3 x f32>) = G_FADD %2, %3 + %5:_(<3 x i32>) = G_BITCAST %4(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) ... --- @@ -279,42 +351,54 @@ body: | ; SI-LABEL: name: test_fadd_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[UV2]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[UV2]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; VI-LABEL: name: test_fadd_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[UV2]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[UV2]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; GFX9-LABEL: name: test_fadd_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[UV2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_FADD %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %3:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %4:_(<2 x f64>) = G_FADD %2, %3 + %5:_(<2 x i64>) = G_BITCAST %4(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... --- @@ -326,66 +410,94 @@ body: | ; SI-LABEL: name: test_fadd_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; VI-LABEL: name: test_fadd_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[BITCAST3]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[BITCAST4]], [[BITCAST5]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; GFX9-LABEL: name: test_fadd_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FADD %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x f16>) = G_FADD [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FADD]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FADD %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -396,148 +508,193 @@ body: | ; SI-LABEL: name: test_fadd_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; SI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fadd_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[BITCAST7]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[BITCAST3]], [[BITCAST8]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[BITCAST4]], [[BITCAST9]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV]], [[UV3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_FADD %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST2]](f16), [[BITCAST3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST4]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST7]](f16), [[BITCAST8]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST9]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x f16>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x f16>) = G_FADD [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %82(i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %88(i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %83(i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FADD1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FADD]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST10]](f16) + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST12]](f16) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST11]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST19]](i16), [[BITCAST20]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST21]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x f16>) = G_BITCAST %2(<3 x i16>) + %7:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %8:_(<3 x f16>) = G_FADD %6, %7 + %9:_(<3 x i16>) = G_IMPLICIT_DEF + %10:_(<3 x i16>) = G_BITCAST %8(<3 x f16>) + %11:_(<6 x i16>) = G_CONCAT_VECTORS %10(<3 x i16>), %9(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... @@ -550,110 +707,154 @@ body: | ; SI-LABEL: name: test_fadd_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; VI-LABEL: name: test_fadd_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[BITCAST10]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[BITCAST4]], [[BITCAST12]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[BITCAST3]], [[BITCAST11]] + ; VI-NEXT: [[FADD3:%[0-9]+]]:_(f16) = G_FADD [[BITCAST5]], [[BITCAST13]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FADD3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV]], [[UV2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FADD]](<2 x s16>), [[FADD1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x f16>) = G_FADD [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x f16>) = G_FADD [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FADD]](<2 x f16>), [[FADD1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %3:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %4:_(<4 x f16>) = G_FADD %2, %3 + %5:_(<4 x i16>) = G_BITCAST %4(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir index 12f3f89757766..fc80ff30fda49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -15,24 +15,34 @@ body: | ; SI-LABEL: name: test_fcanonicalize_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_fcanonicalize_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fcanonicalize_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCANONICALIZE %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCANONICALIZE %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- name: test_fcanonicalize_s64 @@ -43,21 +53,24 @@ body: | ; SI-LABEL: name: test_fcanonicalize_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + ; ; VI-LABEL: name: test_fcanonicalize_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + ; ; GFX9-LABEL: name: test_fcanonicalize_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FCANONICALIZE %0 - $vgpr0_vgpr1 = COPY %0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FCANONICALIZE %1 + $vgpr0_vgpr1 = COPY %0(i64) ... --- name: test_fcanonicalize_s16 @@ -68,34 +81,44 @@ body: | ; SI-LABEL: name: test_fcanonicalize_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; VI-LABEL: name: test_fcanonicalize_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fcanonicalize_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FCANONICALIZE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FCANONICALIZE %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -107,33 +130,43 @@ body: | ; SI-LABEL: name: test_fcanonicalize_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCANONICALIZE]](f32), [[FCANONICALIZE1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; VI-LABEL: name: test_fcanonicalize_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCANONICALIZE]](f32), [[FCANONICALIZE1]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FCANONICALIZE %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCANONICALIZE]](f32), [[FCANONICALIZE1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FCANONICALIZE %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -145,36 +178,46 @@ body: | ; SI-LABEL: name: test_fcanonicalize_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FCANONICALIZE]](f32), [[FCANONICALIZE1]](f32), [[FCANONICALIZE2]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; VI-LABEL: name: test_fcanonicalize_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FCANONICALIZE]](f32), [[FCANONICALIZE1]](f32), [[FCANONICALIZE2]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FCANONICALIZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FCANONICALIZE]](f32), [[FCANONICALIZE1]](f32), [[FCANONICALIZE2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FCANONICALIZE %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -186,33 +229,43 @@ body: | ; SI-LABEL: name: test_fcanonicalize_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FCANONICALIZE]](f64), [[FCANONICALIZE1]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; VI-LABEL: name: test_fcanonicalize_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FCANONICALIZE]](f64), [[FCANONICALIZE1]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FCANONICALIZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FCANONICALIZE]](f64), [[FCANONICALIZE1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FCANONICALIZE %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -224,50 +277,70 @@ body: | ; SI-LABEL: name: test_fcanonicalize_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; VI-LABEL: name: test_fcanonicalize_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FCANONICALIZE %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FCANONICALIZE]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FCANONICALIZE %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -276,66 +349,101 @@ body: | bb.0: ; SI-LABEL: name: test_fcanonicalize_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE1]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE2]](f32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_fcanonicalize_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE1]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE2]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FCANONICALIZE %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %50(i16) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST]](f16), [[DEF1]](f16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %45(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FCANONICALIZE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FCANONICALIZE]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST10]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST11]](i16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST12]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FCANONICALIZE %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -347,81 +455,111 @@ body: | ; SI-LABEL: name: test_fcanonicalize_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE1]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE2]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCANONICALIZE3]](f32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; VI-LABEL: name: test_fcanonicalize_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE]](f16) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE2]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FCANONICALIZE3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x s16>), [[FCANONICALIZE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FCANONICALIZE %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x f16>), [[FCANONICALIZE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FCANONICALIZE %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir index 9f3dc801989b4..a135910bf6d67 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir @@ -15,44 +15,57 @@ body: | ; SI-LABEL: name: test_fceil_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCEIL]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; CI-LABEL: name: test_fceil_s16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[FPEXT]] + ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCEIL]](f32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; VI-LABEL: name: test_fceil_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCEIL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(f16) = G_FCEIL [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FCEIL]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fceil_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCEIL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FCEIL %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(f16) = G_FCEIL [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FCEIL]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FCEIL %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -64,30 +77,43 @@ body: | ; SI-LABEL: name: test_fceil_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FCEIL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCEIL]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; CI-LABEL: name: test_fceil_s32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[COPY]] - ; CI-NEXT: $vgpr0 = COPY [[FCEIL]](s32) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[BITCAST]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCEIL]](f32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_fceil_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FCEIL]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCEIL]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fceil_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FCEIL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCEIL %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FCEIL]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCEIL %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -99,56 +125,72 @@ body: | ; SI-LABEL: name: test_fceil_s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s64), [[C8]] - ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(one), [[COPY]](s64), [[SELECT1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s1) = G_AND [[FCMP]], [[FCMP1]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s1), [[C9]], [[C8]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f64), [[C8]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(one), [[BITCAST]](f64), [[SELECT1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i1) = G_AND [[FCMP]], [[FCMP1]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[AND2]](i1), [[C9]], [[C8]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[SELECT1]], [[SELECT2]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST4]](i64) + ; ; CI-LABEL: name: test_fceil_s64 ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[COPY]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[FCEIL]](s64) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(f64) = G_FCEIL [[BITCAST]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FCEIL]](f64) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_fceil_s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FCEIL]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(f64) = G_FCEIL [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FCEIL]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_fceil_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FCEIL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FCEIL %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(f64) = G_FCEIL [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FCEIL]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FCEIL %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -160,78 +202,109 @@ body: | ; SI-LABEL: name: test_fceil_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCEIL]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FCEIL1:%[0-9]+]]:_(f32) = G_FCEIL [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCEIL1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; CI-LABEL: name: test_fceil_v2s16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; CI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CI-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]] - ; CI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[FPEXT]] + ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCEIL]](f32) + ; CI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; CI-NEXT: [[FCEIL1:%[0-9]+]]:_(f32) = G_FCEIL [[FPEXT1]] + ; CI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FCEIL1]](f32) + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; VI-LABEL: name: test_fceil_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC]] - ; VI-NEXT: [[FCEIL1:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCEIL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCEIL1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(f16) = G_FCEIL [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FCEIL1:%[0-9]+]]:_(f16) = G_FCEIL [[BITCAST2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FCEIL]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FCEIL1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fceil_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC]] - ; GFX9-NEXT: [[FCEIL1:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCEIL]](s16), [[FCEIL1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FCEIL %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(f16) = G_FCEIL [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FCEIL1:%[0-9]+]]:_(f16) = G_FCEIL [[BITCAST2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FCEIL]](f16), [[FCEIL1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FCEIL %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -243,42 +316,55 @@ body: | ; SI-LABEL: name: test_fceil_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[UV]] - ; SI-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCEIL]](s32), [[FCEIL1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[UV]] + ; SI-NEXT: [[FCEIL1:%[0-9]+]]:_(f32) = G_FCEIL [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCEIL]](f32), [[FCEIL1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; CI-LABEL: name: test_fceil_v2s32 ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[UV]] - ; CI-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[UV1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCEIL]](s32), [[FCEIL1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[UV]] + ; CI-NEXT: [[FCEIL1:%[0-9]+]]:_(f32) = G_FCEIL [[UV1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCEIL]](f32), [[FCEIL1]](f32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; VI-LABEL: name: test_fceil_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[UV]] - ; VI-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCEIL]](s32), [[FCEIL1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[UV]] + ; VI-NEXT: [[FCEIL1:%[0-9]+]]:_(f32) = G_FCEIL [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCEIL]](f32), [[FCEIL1]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fceil_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[UV]] - ; GFX9-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCEIL]](s32), [[FCEIL1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FCEIL %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(f32) = G_FCEIL [[UV]] + ; GFX9-NEXT: [[FCEIL1:%[0-9]+]]:_(f32) = G_FCEIL [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FCEIL]](f32), [[FCEIL1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FCEIL %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -290,82 +376,101 @@ body: | ; SI-LABEL: name: test_fceil_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s64), [[C8]] - ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(one), [[UV]](s64), [[SELECT1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s1) = G_AND [[FCMP]], [[FCMP1]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s1), [[C9]], [[C8]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND3]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND4]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] - ; SI-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s64), [[C8]] - ; SI-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(one), [[UV1]](s64), [[SELECT4]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s1) = G_AND [[FCMP2]], [[FCMP3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s1), [[C9]], [[C8]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], [[SELECT5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f64), [[C8]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(one), [[UV]](f64), [[SELECT1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i1) = G_AND [[FCMP]], [[FCMP1]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[AND2]](i1), [[C9]], [[C8]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[SELECT1]], [[SELECT2]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[INT1]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C3]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND3]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB1]](i32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[ASHR1]], [[C6]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[XOR1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[AND4]](i64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB1]](i32), [[C5]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB1]](i32), [[C7]] + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(f64) = G_SELECT [[ICMP2]](i1), [[BITCAST5]], [[BITCAST6]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(f64) = G_SELECT [[ICMP3]](i1), [[UV1]], [[SELECT3]] + ; SI-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f64), [[C8]] + ; SI-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(one), [[UV1]](f64), [[SELECT4]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i1) = G_AND [[FCMP2]], [[FCMP3]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(f64) = G_SELECT [[AND5]](i1), [[C9]], [[C8]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[SELECT4]], [[SELECT5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST7]](<2 x i64>) + ; ; CI-LABEL: name: test_fceil_v2s64 ; CI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[UV]] - ; CI-NEXT: [[FCEIL1:%[0-9]+]]:_(s64) = G_FCEIL [[UV1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCEIL]](s64), [[FCEIL1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CI-NEXT: [[FCEIL:%[0-9]+]]:_(f64) = G_FCEIL [[UV]] + ; CI-NEXT: [[FCEIL1:%[0-9]+]]:_(f64) = G_FCEIL [[UV1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FCEIL]](f64), [[FCEIL1]](f64) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; VI-LABEL: name: test_fceil_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[UV]] - ; VI-NEXT: [[FCEIL1:%[0-9]+]]:_(s64) = G_FCEIL [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCEIL]](s64), [[FCEIL1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FCEIL:%[0-9]+]]:_(f64) = G_FCEIL [[UV]] + ; VI-NEXT: [[FCEIL1:%[0-9]+]]:_(f64) = G_FCEIL [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FCEIL]](f64), [[FCEIL1]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; GFX9-LABEL: name: test_fceil_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[UV]] - ; GFX9-NEXT: [[FCEIL1:%[0-9]+]]:_(s64) = G_FCEIL [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCEIL]](s64), [[FCEIL1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FCEIL %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[FCEIL:%[0-9]+]]:_(f64) = G_FCEIL [[UV]] + ; GFX9-NEXT: [[FCEIL1:%[0-9]+]]:_(f64) = G_FCEIL [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FCEIL]](f64), [[FCEIL1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FCEIL %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir index be976230b41d5..87faa91efd3a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir @@ -10,14 +10,18 @@ body: | ; GFX1150-LABEL: name: f32_olt ; GFX1150: liveins: $sgpr0, $sgpr1 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(olt), [[COPY]](s32), [[COPY1]] - ; GFX1150-NEXT: $sgpr0 = COPY [[FCMP]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_FCMP floatpred(olt), %0(s32), %1 - $sgpr0 = COPY %2 + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:_(i32) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[BITCAST1]] + ; GFX1150-NEXT: $sgpr0 = COPY [[FCMP]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i32) = G_FCMP floatpred(olt), %2(f32), %3 + $sgpr0 = COPY %4(i32) ... @@ -30,17 +34,21 @@ body: | ; GFX1150-LABEL: name: f16_olt ; GFX1150: liveins: $sgpr0, $sgpr1 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX1150-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(olt), [[TRUNC]](s16), [[TRUNC1]] - ; GFX1150-NEXT: $sgpr0 = COPY [[FCMP]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = COPY $sgpr1 - %3:_(s16) = G_TRUNC %2(s32) - %4:_(s32) = G_FCMP floatpred(olt), %1(s16), %3 - $sgpr0 = COPY %4 + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX1150-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX1150-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:_(i32) = G_FCMP floatpred(olt), [[BITCAST]](f16), [[BITCAST1]] + ; GFX1150-NEXT: $sgpr0 = COPY [[FCMP]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $sgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(f16) = G_BITCAST %1(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(i32) = G_FCMP floatpred(olt), %4(f16), %5 + $sgpr0 = COPY %6(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir index c9e56e6851a07..dcbe27d9b4abd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -14,32 +14,42 @@ body: | ; GFX7-LABEL: name: test_fcmp_s32 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] - ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f32), [[BITCAST1]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C]], [[COPY]] + ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](i32) + ; ; GFX8-LABEL: name: test_fcmp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f32), [[BITCAST1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C]], [[COPY]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](i32) + ; ; GFX9-LABEL: name: test_fcmp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_FCMP floatpred(oeq), %0, %1 - %3:_(s32) = G_SELECT %2, %0, %1 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f32), [[BITCAST1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C]], [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i1) = G_FCMP floatpred(oeq), %2(f32), %3 + %5:_(i32) = G_SELECT %4(i1), %0, %1 + $vgpr0 = COPY %5(i32) ... --- @@ -50,32 +60,42 @@ body: | ; GFX7-LABEL: name: test_fcmp_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[C]](i64) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f64), [[BITCAST1]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[FCMP]](i1), [[C]], [[COPY]] + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + ; ; GFX8-LABEL: name: test_fcmp_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[C]](i64) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f64), [[BITCAST1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[FCMP]](i1), [[C]], [[COPY]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + ; ; GFX9-LABEL: name: test_fcmp_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_FCMP floatpred(oeq), %0, %1 - %3:_(s64) = G_SELECT %2, %0, %1 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[C]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f64), [[BITCAST1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[FCMP]](i1), [[C]], [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(i1) = G_FCMP floatpred(oeq), %2(f64), %3 + %5:_(i64) = G_SELECT %4(i1), %0, %1 + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -86,42 +106,52 @@ body: | ; GFX7-LABEL: name: test_fcmp_s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[C]](i16) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[FPEXT]](f32), [[FPEXT1]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[FCMP]](i1), [[C]], [[TRUNC]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_fcmp_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s16), [[TRUNC]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[C]](i16) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f16), [[BITCAST1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[FCMP]](i1), [[C]], [[TRUNC]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fcmp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s16), [[TRUNC]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s16) = G_CONSTANT i16 0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %1 - %3:_(s1) = G_FCMP floatpred(oeq), %0, %2 - %4:_(s16) = G_SELECT %3, %0, %2 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[C]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f16), [[BITCAST1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[FCMP]](i1), [[C]], [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i16) = G_CONSTANT i16 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(f16) = G_BITCAST %0(i16) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(i1) = G_FCMP floatpred(oeq), %3(f16), %4 + %6:_(i16) = G_SELECT %5(i1), %0, %2 + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -132,45 +162,58 @@ body: | ; GFX7-LABEL: name: test_fcmp_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV]] - ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST1]](f32), [[UV]] + ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST2]](f32), [[UV1]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX8-LABEL: name: test_fcmp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST1]](f32), [[UV]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST2]](f32), [[UV1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fcmp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 - %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %3:_(<2 x s1>) = G_FCMP floatpred(oeq), %1, %2 - %4:_(<2 x s32>) = G_ANYEXT %3 - S_NOP 0, implicit %4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST1]](f32), [[UV]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST2]](f32), [[UV1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32) + %2:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_BITCAST %2(<2 x i32>) + %5:_(<2 x i1>) = G_FCMP floatpred(oeq), %3(<2 x f32>), %4 + %6:_(<2 x i32>) = G_ANYEXT %5(<2 x i1>) + S_NOP 0, implicit %6(<2 x i32>) ... --- @@ -181,45 +224,58 @@ body: | ; GFX7-LABEL: name: test_fcmp_v2s32_flags ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV]] - ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(oeq), [[BITCAST1]](f32), [[UV]] + ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(oeq), [[BITCAST2]](f32), [[UV1]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX8-LABEL: name: test_fcmp_v2s32_flags ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(oeq), [[BITCAST1]](f32), [[UV]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(oeq), [[BITCAST2]](f32), [[UV1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fcmp_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 - %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %3:_(<2 x s1>) = nnan G_FCMP floatpred(oeq), %1, %2 - %4:_(<2 x s32>) = G_ANYEXT %3 - S_NOP 0, implicit %4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(oeq), [[BITCAST1]](f32), [[UV]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(oeq), [[BITCAST2]](f32), [[UV1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32) + %2:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_BITCAST %2(<2 x i32>) + %5:_(<2 x i1>) = nnan G_FCMP floatpred(oeq), %3(<2 x f32>), %4 + %6:_(<2 x i32>) = G_ANYEXT %5(<2 x i1>) + S_NOP 0, implicit %6(<2 x i32>) ... --- @@ -231,53 +287,59 @@ body: | ; GFX7-LABEL: name: test_fcmp_v3s32 ; GFX7: liveins: $vgpr0_vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] - ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] - ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x f32>) = G_IMPLICIT_DEF + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<3 x f32>) + ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV]](f32), [[UV3]] + ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV1]](f32), [[UV4]] + ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV2]](f32), [[UV5]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP2]](i1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX8-LABEL: name: test_fcmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x f32>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<3 x f32>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV]](f32), [[UV3]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV1]](f32), [[UV4]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV2]](f32), [[UV5]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP2]](i1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fcmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = G_IMPLICIT_DEF - %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %2:_(<3 x s1>) = G_FCMP floatpred(oeq), %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - S_NOP 0, implicit %3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x f32>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[DEF]](<3 x f32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV]](f32), [[UV3]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV1]](f32), [[UV4]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV2]](f32), [[UV5]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP2]](i1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x f32>) = G_IMPLICIT_DEF + %1:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %2:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %3:_(<3 x i1>) = G_FCMP floatpred(oeq), %0(<3 x f32>), %2 + %4:_(<3 x i32>) = G_ANYEXT %3(<3 x i1>) + S_NOP 0, implicit %4(<3 x i32>) ... @@ -291,62 +353,68 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV4]] - ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV5]] - ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV6]] - ; GFX7-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV3]](s32), [[UV7]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:_(<4 x f32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x f32>)) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[LOAD]](<4 x f32>) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV]](f32), [[UV4]] + ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV1]](f32), [[UV5]] + ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV2]](f32), [[UV6]] + ; GFX7-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV3]](f32), [[UV7]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP2]](i1) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP3]](i1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) + ; ; GFX8-LABEL: name: test_fcmp_v4s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV4]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV5]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV6]] - ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV3]](s32), [[UV7]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(<4 x f32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x f32>)) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[LOAD]](<4 x f32>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV]](f32), [[UV4]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV1]](f32), [[UV5]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV2]](f32), [[UV6]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV3]](f32), [[UV7]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP2]](i1) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP3]](i1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) + ; ; GFX9-LABEL: name: test_fcmp_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV4]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV5]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV6]] - ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV3]](s32), [[UV7]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x f32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x f32>)) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[LOAD]](<4 x f32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV]](f32), [[UV4]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV1]](f32), [[UV5]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV2]](f32), [[UV6]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[UV3]](f32), [[UV7]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP]](i1) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP1]](i1) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP2]](i1) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[FCMP3]](i1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) %0:_(p1) = G_IMPLICIT_DEF - %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load (<4 x s32>)) - %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %3:_(<4 x s1>) = G_FCMP floatpred(oeq) , %1, %2 - %4:_(<4 x s32>) = G_ANYEXT %3 - S_NOP 0, implicit %4 + %1:_(<4 x f32>) = G_LOAD %0(p1) :: (volatile load (<4 x f32>)) + %2:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %3:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %4:_(<4 x i1>) = G_FCMP floatpred(oeq), %1(<4 x f32>), %3 + %5:_(<4 x i32>) = G_ANYEXT %4(<4 x i1>) + S_NOP 0, implicit %5(<4 x i32>) ... @@ -358,84 +426,112 @@ body: | ; GFX7-LABEL: name: test_icmp_v2s16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]] - ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]] - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV]], [[UV2]] - ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %26(i16) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %27(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[FPEXT]](f32), [[FPEXT1]] + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[FPEXT2]](f32), [[FPEXT3]] + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[UV]], [[UV2]] + ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[FCMP1]](i1), [[UV1]], [[UV3]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX8-LABEL: name: test_icmp_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC]](s16), [[TRUNC2]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC1]](s16), [[TRUNC3]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV]], [[UV2]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST2]](f16), [[BITCAST3]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST4]](f16), [[BITCAST5]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[UV]], [[UV2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[FCMP1]](i1), [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_icmp_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC]](s16), [[TRUNC2]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC1]](s16), [[TRUNC3]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV]], [[UV2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %3:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %4:_(<2 x s1>) = G_FCMP floatpred(oeq), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST2]](f16), [[BITCAST3]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oeq), [[BITCAST4]](f16), [[BITCAST5]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[UV]], [[UV2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[FCMP1]](i1), [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %3:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %4:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %5:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %6:_(<2 x i1>) = G_FCMP floatpred(oeq), %4(<2 x f16>), %5 + %7:_(<2 x i32>) = G_SELECT %6(<2 x i1>), %2, %3 + $vgpr0_vgpr1 = COPY %7(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fconstant.mir index 6906ff9f5b349..04b6968cf4b56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fconstant.mir @@ -8,10 +8,12 @@ body: | bb.0: ; GCN-LABEL: name: test_fconstant_s32 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_FCONSTANT float 1.0 - $vgpr0 = COPY %0 + ; GCN: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[C]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(f32) = G_FCONSTANT float 1.000000e+00 + %1:_(i32) = G_BITCAST %0(f32) + $vgpr0 = COPY %1(i32) ... --- name: test_fconstant_s64 @@ -19,10 +21,12 @@ body: | bb.0: ; GCN-LABEL: name: test_fconstant_s64 - ; GCN: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s64) = G_FCONSTANT double 1.0 - $vgpr0_vgpr1 = COPY %0 + ; GCN: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[C]](f64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(f64) = G_FCONSTANT double 1.000000e+00 + %1:_(i64) = G_BITCAST %0(f64) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -31,10 +35,12 @@ body: | bb.0: ; GCN-LABEL: name: test_fconstant_s16 - ; GCN: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s16) = G_FCONSTANT half 1.0 - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; GCN: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[C]](f16) + ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(f16) = G_FCONSTANT half 0xH3C00 + %1:_(i16) = G_BITCAST %0(f16) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir index 60ccd20c095cd..2f5bc3f1033bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -14,54 +14,75 @@ body: | ; SI-LABEL: name: test_copysign_s16_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; SI-NEXT: %4:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %4(s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST3]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_copysign_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: %4:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %4(s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST3]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_copysign_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: %4:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %4(s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_FCOPYSIGN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST3]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FCOPYSIGN %4, %5(f16) + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -73,42 +94,63 @@ body: | ; SI-LABEL: name: test_copysign_s32_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: %2:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY %2(s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; VI-LABEL: name: test_copysign_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: %2:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0 = COPY %2(s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; GFX9-LABEL: name: test_copysign_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: %2:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0 = COPY %2(s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FCOPYSIGN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FCOPYSIGN %2, %3(f32) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -120,42 +162,63 @@ body: | ; SI-LABEL: name: test_copysign_s64_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: %2:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY %2(s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST3]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; VI-LABEL: name: test_copysign_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: %2:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY %2(s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST3]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; GFX9-LABEL: name: test_copysign_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: %2:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %2(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FCOPYSIGN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST3]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FCOPYSIGN %2, %3(f64) + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -167,51 +230,72 @@ body: | ; SI-LABEL: name: test_copysign_s64_s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; SI-NEXT: %2:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY %2(s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[BITCAST3]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; VI-LABEL: name: test_copysign_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C2]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; VI-NEXT: %2:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY %2(s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[BITCAST3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C2]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; GFX9-LABEL: name: test_copysign_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C2]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; GFX9-NEXT: %2:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %2(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_FCOPYSIGN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[BITCAST3]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C2]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f64) = G_FCOPYSIGN %2, %3(f32) + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -223,51 +307,72 @@ body: | ; SI-LABEL: name: test_copysign_s32_s64 ; SI: liveins: $vgpr0, $vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: %2:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY %2(s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; VI-LABEL: name: test_copysign_s32_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: %2:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0 = COPY %2(s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; GFX9-LABEL: name: test_copysign_s32_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: %2:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0 = COPY %2(s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = COPY $vgpr1_vgpr2 - %2:_(s32) = G_FCOPYSIGN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = COPY $vgpr1_vgpr2 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f32) = G_FCOPYSIGN %2, %3(f64) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -279,59 +384,80 @@ body: | ; SI-LABEL: name: test_copysign_s16_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; SI-NEXT: %3:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_copysign_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: %3:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_copysign_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: %3:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_FCOPYSIGN %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(f16) = G_BITCAST %2(i16) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f16) = G_FCOPYSIGN %3, %4(f32) + %6:_(i16) = G_BITCAST %5(f16) + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -343,55 +469,76 @@ body: | ; SI-LABEL: name: test_copysign_s32_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] - ; SI-NEXT: %3:_(s32) = disjoint G_OR [[AND]], [[AND2]] - ; SI-NEXT: $vgpr0 = COPY %3(s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; VI-LABEL: name: test_copysign_s32_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] - ; VI-NEXT: %3:_(s32) = disjoint G_OR [[AND]], [[AND2]] - ; VI-NEXT: $vgpr0 = COPY %3(s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; GFX9-LABEL: name: test_copysign_s32_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] - ; GFX9-NEXT: %3:_(s32) = disjoint G_OR [[AND]], [[AND2]] - ; GFX9-NEXT: $vgpr0 = COPY %3(s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(s32) = G_FCOPYSIGN %0, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f32) = G_FCOPYSIGN %3, %4(f16) + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... --- @@ -403,58 +550,76 @@ body: | ; SI-LABEL: name: test_copysign_s64_s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; SI-NEXT: %3:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY %3(s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[BITCAST3]](i16) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; VI-LABEL: name: test_copysign_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; VI-NEXT: %3:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY %3(s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[BITCAST3]](i16) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C2]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; GFX9-LABEL: name: test_copysign_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; GFX9-NEXT: %3:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %3(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_FCOPYSIGN %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[C2]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST4]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(f64) = G_BITCAST %0(i64) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f64) = G_FCOPYSIGN %3, %4(f16) + %6:_(i64) = G_BITCAST %5(f64) + $vgpr0_vgpr1 = COPY %6(i64) ... --- @@ -466,59 +631,80 @@ body: | ; SI-LABEL: name: test_copysign_s16_s64 ; SI: liveins: $vgpr0, $vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; SI-NEXT: %3:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_copysign_s16_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: %3:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_copysign_s16_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: %3:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = COPY $vgpr1_vgpr2 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_FCOPYSIGN %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = COPY $vgpr1_vgpr2 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(f16) = G_BITCAST %2(i16) + %4:_(f64) = G_BITCAST %1(i64) + %5:_(f16) = G_FCOPYSIGN %3, %4(f64) + %6:_(i16) = G_BITCAST %5(f16) + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -530,58 +716,79 @@ body: | ; SI-LABEL: name: test_copysign_v2s16_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BITCAST1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] - ; SI-NEXT: %2:_(<2 x s16>) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY %2(<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32768 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32767 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[BITCAST4]], [[BITCAST3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[BITCAST5]], [[BITCAST2]] + ; SI-NEXT: [[OR2:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](<2 x i16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) ; ; VI-LABEL: name: test_copysign_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BITCAST1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] - ; VI-NEXT: %2:_(<2 x s16>) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0 = COPY %2(<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32768 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32767 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[BITCAST4]], [[BITCAST3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[BITCAST5]], [[BITCAST2]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](<2 x i16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST6]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) ; ; GFX9-LABEL: name: test_copysign_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BUILD_VECTOR]] - ; GFX9-NEXT: %2:_(<2 x s16>) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0 = COPY %2(<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FCOPYSIGN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[BITCAST3]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST4]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FCOPYSIGN %2, %3(<2 x f16>) + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -593,48 +800,69 @@ body: | ; SI-LABEL: name: test_copysign_v2s32_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] - ; SI-NEXT: %2:_(<2 x s32>) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY %2(<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST3]], [[BUILD_VECTOR]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[OR]](<2 x i32>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST4]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) ; ; VI-LABEL: name: test_copysign_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] - ; VI-NEXT: %2:_(<2 x s32>) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY %2(<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST3]], [[BUILD_VECTOR]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[OR]](<2 x i32>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST4]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] - ; GFX9-NEXT: %2:_(<2 x s32>) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %2(<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FCOPYSIGN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST3]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[OR]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST4]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FCOPYSIGN %2, %3(<2 x f32>) + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -646,60 +874,81 @@ body: | ; SI-LABEL: name: test_copysign_v2s64_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV2]], [[C]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV3]], [[C]] - ; SI-NEXT: %13:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; SI-NEXT: %14:_(s64) = disjoint G_OR [[AND1]], [[AND3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %13(s64), %14(s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x i64>) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i64>) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[UV2]], [[C]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[UV3]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND2]] + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND1]], [[AND3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST4]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) ; ; VI-LABEL: name: test_copysign_v2s64_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV2]], [[C]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV3]], [[C]] - ; VI-NEXT: %13:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; VI-NEXT: %14:_(s64) = disjoint G_OR [[AND1]], [[AND3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %13(s64), %14(s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x i64>) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i64>) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[UV2]], [[C]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[UV3]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND2]] + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND1]], [[AND3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST4]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV2]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV3]], [[C]] - ; GFX9-NEXT: %13:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; GFX9-NEXT: %14:_(s64) = disjoint G_OR [[AND1]], [[AND3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %13(s64), %14(s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_FCOPYSIGN %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST1]](<2 x f64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x i64>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i64>) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND2]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND1]], [[AND3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST4]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %3:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %4:_(<2 x f64>) = G_FCOPYSIGN %2, %3(<2 x f64>) + %5:_(<2 x i64>) = G_BITCAST %4(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... --- @@ -711,78 +960,99 @@ body: | ; SI-LABEL: name: test_copysign_v2s64_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] - ; SI-NEXT: %17:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; SI-NEXT: %18:_(s64) = disjoint G_OR [[AND1]], [[AND3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %17(s64), %18(s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x i64>) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i32>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV2]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV3]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[COPY2]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[ZEXT1]], [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[SHL1]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND2]] + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND1]], [[AND3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST4]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) ; ; VI-LABEL: name: test_copysign_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] - ; VI-NEXT: %17:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; VI-NEXT: %18:_(s64) = disjoint G_OR [[AND1]], [[AND3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %17(s64), %18(s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x i64>) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i32>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV2]](i32) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[COPY2]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[ZEXT1]], [[C2]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[SHL1]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND2]] + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND1]], [[AND3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST4]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] - ; GFX9-NEXT: %17:_(s64) = disjoint G_OR [[AND]], [[AND2]] - ; GFX9-NEXT: %18:_(s64) = disjoint G_OR [[AND1]], [[AND3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %17(s64), %18(s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s64>) = G_FCOPYSIGN %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x i64>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i32>) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV2]](i32) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV3]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ZEXT]], [[COPY2]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[ZEXT1]], [[C2]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[SHL]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[SHL1]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND2]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND1]], [[AND3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[BUILD_VECTOR]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST4]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f64>) = G_FCOPYSIGN %2, %3(<2 x f32>) + %5:_(<2 x i64>) = G_BITCAST %4(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... @@ -795,72 +1065,93 @@ body: | ; SI-LABEL: name: test_copysign_v2s32_v2s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) - ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] - ; SI-NEXT: %2:_(<2 x s32>) = disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY %2(<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i64>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY2]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C2]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR1]](i64) + ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[OR]](<2 x i32>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST4]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) ; ; VI-LABEL: name: test_copysign_v2s32_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] - ; VI-NEXT: %2:_(<2 x s32>) = disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY %2(<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i64>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR1]](i64) + ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[OR]](<2 x i32>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST4]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] - ; GFX9-NEXT: %2:_(<2 x s32>) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %2(<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<2 x s32>) = G_FCOPYSIGN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C1]](i32), [[C1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BITCAST1]](<2 x f64>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i32>) = G_AND [[BITCAST2]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[BITCAST3]](<2 x i64>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY2]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR1]](i64) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[OR]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BITCAST4]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %4:_(<2 x f32>) = G_FCOPYSIGN %2, %3(<2 x f64>) + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -872,42 +1163,63 @@ body: | ; SI-LABEL: name: test_copysign_s32_s32_flagss ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: %2:_(s32) = nnan disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY %2(s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = nnan disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; VI-LABEL: name: test_copysign_s32_s32_flagss ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: %2:_(s32) = nnan disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0 = COPY %2(s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = nnan disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; GFX9-LABEL: name: test_copysign_s32_s32_flagss ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: %2:_(s32) = nnan disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0 = COPY %2(s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nnan G_FCOPYSIGN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = nnan disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nnan G_FCOPYSIGN %2, %3(f32) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... @@ -919,55 +1231,76 @@ body: | ; SI-LABEL: name: test_copysign_s32_s16_flags ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] - ; SI-NEXT: %3:_(s32) = nnan disjoint G_OR [[AND]], [[AND2]] - ; SI-NEXT: $vgpr0 = COPY %3(s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = nnan disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; VI-LABEL: name: test_copysign_s32_s16_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] - ; VI-NEXT: %3:_(s32) = nnan disjoint G_OR [[AND]], [[AND2]] - ; VI-NEXT: $vgpr0 = COPY %3(s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = nnan disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) ; ; GFX9-LABEL: name: test_copysign_s32_s16_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] - ; GFX9-NEXT: %3:_(s32) = nnan disjoint G_OR [[AND]], [[AND2]] - ; GFX9-NEXT: $vgpr0 = COPY %3(s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(s32) = nnan G_FCOPYSIGN %0, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT]], [[C2]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = nnan disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f32) = nnan G_FCOPYSIGN %3, %4(f16) + %6:_(i32) = G_BITCAST %5(f32) + $vgpr0 = COPY %6(i32) ... @@ -980,57 +1313,78 @@ body: | ; SI-LABEL: name: test_copysign_s16_s32_flags ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; SI-NEXT: %3:_(s16) = nnan disjoint G_OR [[AND]], [[AND1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = nnan disjoint G_OR [[AND]], [[AND1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_copysign_s16_s32_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: %3:_(s16) = nnan disjoint G_OR [[AND]], [[AND1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = nnan disjoint G_OR [[AND]], [[AND1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_copysign_s16_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: %3:_(s16) = nnan disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = nnan G_FCOPYSIGN %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = nnan disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST5]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(f16) = G_BITCAST %2(i16) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f16) = nnan G_FCOPYSIGN %3, %4(f32) + %6:_(i16) = G_BITCAST %5(f16) + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir index c230edac5ddf9..ac7d9aa0941a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -14,32 +14,42 @@ body: | ; SI-LABEL: name: test_fcos_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_fcos_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fcos_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCOS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCOS %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -51,32 +61,42 @@ body: | ; SI-LABEL: name: test_fcos_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT1]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_fcos_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT1]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_fcos_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FCOS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FCOS %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_fcos_s16 @@ -87,42 +107,52 @@ body: | ; SI-LABEL: name: test_fcos_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; VI-LABEL: name: test_fcos_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fcos_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FCOS %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FCOS %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -134,46 +164,56 @@ body: | ; SI-LABEL: name: test_fcos_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; VI-LABEL: name: test_fcos_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fcos_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT]](s32), [[INT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FCOS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT]](f32), [[INT1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FCOS %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -185,54 +225,64 @@ body: | ; SI-LABEL: name: test_fcos_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32), [[INT5]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; VI-LABEL: name: test_fcos_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32), [[INT5]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fcos_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s32) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT]](s32), [[INT1]](s32), [[INT2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FCOS %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](f32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT]](f32), [[INT1]](f32), [[INT2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FCOS %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -244,46 +294,56 @@ body: | ; SI-LABEL: name: test_fcos_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[C]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f64) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[C]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f64) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT1]](f64), [[INT3]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; VI-LABEL: name: test_fcos_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[C]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f64) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f64) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT1]](f64), [[INT3]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; GFX9-LABEL: name: test_fcos_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s64) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT]](s64), [[INT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FCOS %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f64) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](f64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT]](f64), [[INT1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FCOS %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -295,70 +355,93 @@ body: | ; SI-LABEL: name: test_fcos_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C1]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; VI-LABEL: name: test_fcos_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %14(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f16) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fcos_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FCOS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %11(i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT]](f16), [[INT1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FCOS %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -367,87 +450,114 @@ body: | bb.0: ; SI-LABEL: name: test_fcos_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]] - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C1]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[C1]] + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT5]](f32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_fcos_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s16) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f16) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f16) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f16) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](f16) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fcos_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FCOS %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %34(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](f16) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[INT2]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FCOS %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -459,119 +569,155 @@ body: | ; SI-LABEL: name: test_fcos_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]] - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT3]], [[C1]] - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s32) - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %34(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C1]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[C1]] + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT5]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT3]], [[C1]] + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](f32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](f32) + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT7]](f32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; VI-LABEL: name: test_fcos_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s16) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %26(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f16) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[C1]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](f16) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f16) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](f16) + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[C1]] + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](f16) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[INT7]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; GFX9-LABEL: name: test_fcos_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FCOS %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %21(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %27(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[C1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](f16) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](f16) + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[C1]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT]](f16), [[INT1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT2]](f16), [[INT3]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST9]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FCOS %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- @@ -583,31 +729,41 @@ body: | ; SI-LABEL: name: test_fcos_s32_flags ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_fcos_s32_flags ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fcos_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan G_FCOS %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan G_FCOS %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir index 1f9c059c2ac60..7aac3f023817b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -22,101 +22,120 @@ body: | ; SI-LABEL: name: test_fdiv_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[FPEXT1]](f32), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT6]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fdiv_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]] - ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; VI-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST3]], [[FADD1]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST1]](f16), [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fdiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST3]], [[FADD1]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST1]](f16), [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16 ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_FDIV %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST1]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FDIV %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -135,95 +154,113 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fdiv_s32_denorms_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fdiv_s32_denorms_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_on ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST1]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX10-LABEL: name: test_fdiv_s32_denorms_on ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FDIV %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -242,103 +279,121 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_off ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fdiv_s32_denorms_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fdiv_s32_denorms_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST1]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX10-LABEL: name: test_fdiv_s32_denorms_off ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FDIV %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -357,103 +412,121 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = arcp G_FNEG [[INT]] ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = arcp G_FNEG [[INT]] ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = arcp G_FNEG [[INT]] ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST1]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f32) = arcp G_FMUL [[BITCAST]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX10-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = arcp G_FNEG [[INT]] ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = arcp G_FDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST1]](f32), [[BITCAST]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = arcp G_FDIV %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -472,110 +545,132 @@ body: | ; SI-LABEL: name: test_fdiv_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST1]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[INT]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[INT3]](f64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST5]](i64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[XOR]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST1]](f64), [[BITCAST]](f64) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST6]](i64) ; ; VI-LABEL: name: test_fdiv_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST1]](f64), [[BITCAST]](f64) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-LABEL: name: test_fdiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST1]](f64), [[BITCAST]](f64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64 ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMA3]] - ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[COPY]] - ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST1]](f64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[BITCAST]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMA5]](f64) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX10-LABEL: name: test_fdiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f64), [[BITCAST1]](f64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST1]](f64), [[BITCAST]](f64) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FDIV %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -594,176 +689,194 @@ body: | ; SI-LABEL: name: test_fdiv_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fdiv_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fdiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s32 ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX10-LABEL: name: test_fdiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FDIV %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -775,160 +888,178 @@ body: | ; SI-LABEL: name: test_fdiv_v2s32_flags ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fdiv_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT7]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fdiv_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT7]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s32_flags ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX10-LABEL: name: test_fdiv_v2s32_flags ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = nnan G_FDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV2]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV2]](f32), [[UV]](f32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV3]](f32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = nnan G_FNEG [[INT7]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(f32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(f32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV3]](f32), [[UV1]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = nnan G_FDIV %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -940,210 +1071,228 @@ body: | ; SI-LABEL: name: test_fdiv_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV3]](f32), [[UV]](f32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV4]](f32), [[UV1]](f32) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(f32), [[INT15:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(f32), [[INT17:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](f32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(f32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(f32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](f32), [[FMA11]](f32), [[FMA13]](f32), [[INT17]](i1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](f32), [[UV5]](f32), [[UV2]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32), [[INT20]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; VI-LABEL: name: test_fdiv_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; VI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; VI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; VI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; VI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; VI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; VI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; VI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; VI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; VI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; VI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV3]](f32), [[UV]](f32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV4]](f32), [[UV1]](f32) + ; VI-NEXT: [[INT14:%[0-9]+]]:_(f32), [[INT15:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 0 + ; VI-NEXT: [[INT16:%[0-9]+]]:_(f32), [[INT17:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 1 + ; VI-NEXT: [[INT18:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](f32) + ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[INT14]] + ; VI-NEXT: [[FMA10:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; VI-NEXT: [[FMA11:%[0-9]+]]:_(f32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT16]], [[FMA11]] + ; VI-NEXT: [[FMA12:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; VI-NEXT: [[FMA13:%[0-9]+]]:_(f32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; VI-NEXT: [[FMA14:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; VI-NEXT: [[INT19:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](f32), [[FMA11]](f32), [[FMA13]](f32), [[INT17]](i1) + ; VI-NEXT: [[INT20:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](f32), [[UV5]](f32), [[UV2]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32), [[INT20]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; GFX9-LABEL: name: test_fdiv_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; GFX9-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; GFX9-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX9-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; GFX9-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; GFX9-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; GFX9-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; GFX9-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX9-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; GFX9-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV3]](f32), [[UV]](f32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV4]](f32), [[UV1]](f32) + ; GFX9-NEXT: [[INT14:%[0-9]+]]:_(f32), [[INT15:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 0 + ; GFX9-NEXT: [[INT16:%[0-9]+]]:_(f32), [[INT17:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 1 + ; GFX9-NEXT: [[INT18:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](f32) + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[INT14]] + ; GFX9-NEXT: [[FMA10:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; GFX9-NEXT: [[FMA11:%[0-9]+]]:_(f32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT16]], [[FMA11]] + ; GFX9-NEXT: [[FMA12:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; GFX9-NEXT: [[FMA13:%[0-9]+]]:_(f32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; GFX9-NEXT: [[FMA14:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; GFX9-NEXT: [[INT19:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](f32), [[FMA11]](f32), [[FMA13]](f32), [[INT17]](i1) + ; GFX9-NEXT: [[INT20:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](f32), [[UV5]](f32), [[UV2]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32), [[INT20]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s32 ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]] - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-UNSAFE-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](f32) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; GFX10-LABEL: name: test_fdiv_v3s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; GFX10-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; GFX10-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; GFX10-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; GFX10-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX10-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; GFX10-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; GFX10-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; GFX10-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; GFX10-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX10-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; GFX10-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_FDIV %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f32), [[UV3]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[UV3]](f32), [[UV]](f32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f32), [[UV4]](f32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[UV4]](f32), [[UV1]](f32) + ; GFX10-NEXT: [[INT14:%[0-9]+]]:_(f32), [[INT15:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 0 + ; GFX10-NEXT: [[INT16:%[0-9]+]]:_(f32), [[INT17:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](f32), [[UV5]](f32), 1 + ; GFX10-NEXT: [[INT18:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](f32) + ; GFX10-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[INT14]] + ; GFX10-NEXT: [[FMA10:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; GFX10-NEXT: [[FMA11:%[0-9]+]]:_(f32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT16]], [[FMA11]] + ; GFX10-NEXT: [[FMA12:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; GFX10-NEXT: [[FMA13:%[0-9]+]]:_(f32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; GFX10-NEXT: [[FMA14:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; GFX10-NEXT: [[INT19:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](f32), [[FMA11]](f32), [[FMA13]](f32), [[INT17]](i1) + ; GFX10-NEXT: [[INT20:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](f32), [[UV5]](f32), [[UV2]](f32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT6]](f32), [[INT13]](f32), [[INT20]](f32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %3:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %4:_(<3 x f32>) = G_FDIV %2, %3 + %5:_(<3 x i32>) = G_BITCAST %4(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) ... --- @@ -1155,189 +1304,215 @@ body: | ; SI-LABEL: name: test_fdiv_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]] - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; SI-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; SI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; SI-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT7]](s64) - ; SI-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64) - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV13]](s32), [[UV19]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV15]](s32), [[UV17]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP3]] - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[XOR1]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[UV2]](f64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[INT]](f64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[INT3]](f64) + ; SI-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST5]](i64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV5]](i32), [[UV11]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV7]](i32), [[UV9]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[XOR]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[UV2]](f64), [[UV]](f64) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f64), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 0 + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[INT7]] + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f64) + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; SI-NEXT: [[INT10:%[0-9]+]]:_(f64), [[INT11:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 1 + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[INT10]], [[FMA8]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; SI-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST6]](i64) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i64) = G_BITCAST [[UV3]](f64) + ; SI-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST7]](i64) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i64) = G_BITCAST [[INT7]](f64) + ; SI-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST8]](i64) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i64) = G_BITCAST [[INT10]](f64) + ; SI-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST9]](i64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV13]](i32), [[UV19]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV15]](i32), [[UV17]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP2]], [[ICMP3]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f64), [[FMA8]](f64), [[FMUL1]](f64), [[XOR1]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f64), [[UV3]](f64), [[UV1]](f64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT6]](f64), [[INT13]](f64) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST10]](<2 x i64>) ; ; VI-LABEL: name: test_fdiv_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; VI-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; VI-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; VI-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; VI-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[UV2]](f64), [[UV]](f64) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f64), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 0 + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[INT7]] + ; VI-NEXT: [[INT9:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f64) + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; VI-NEXT: [[INT10:%[0-9]+]]:_(f64), [[INT11:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 1 + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(f64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[INT10]], [[FMA8]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f64), [[FMA8]](f64), [[FMUL1]](f64), [[INT11]](i1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f64), [[UV3]](f64), [[UV1]](f64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT6]](f64), [[INT13]](f64) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; GFX9-LABEL: name: test_fdiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; GFX9-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[UV2]](f64), [[UV]](f64) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(f64), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 0 + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f64) + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; GFX9-NEXT: [[INT10:%[0-9]+]]:_(f64), [[INT11:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 1 + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(f64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[INT10]], [[FMA8]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f64), [[FMA8]](f64), [[FMUL1]](f64), [[INT11]](i1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f64), [[UV3]](f64), [[UV1]](f64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT6]](f64), [[INT13]](f64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s64 ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[FMA3]] - ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[UV]] - ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT1]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[INT1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA7]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FMA8]], [[FMA7]], [[FMA7]] - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[FMA9]] - ; GFX9-UNSAFE-NEXT: [[FMA10:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: [[FMA11:%[0-9]+]]:_(s64) = G_FMA [[FMA10]], [[FMA9]], [[FMUL1]] - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA5]](s64), [[FMA11]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV2]] + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](f64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[UV]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV3]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](f64) + ; GFX9-UNSAFE-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[INT1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA7:%[0-9]+]]:_(f64) = G_FMA [[FMA6]], [[INT1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[FMA8:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA7]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA9:%[0-9]+]]:_(f64) = G_FMA [[FMA8]], [[FMA7]], [[FMA7]] + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[FMA9]] + ; GFX9-UNSAFE-NEXT: [[FMA10:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMUL1]], [[UV1]] + ; GFX9-UNSAFE-NEXT: [[FMA11:%[0-9]+]]:_(f64) = G_FMA [[FMA10]], [[FMA9]], [[FMUL1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMA5]](f64), [[FMA11]](f64) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; GFX10-LABEL: name: test_fdiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; GFX10-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_FDIV %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](f64), [[UV2]](f64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[UV2]](f64), [[UV]](f64) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(f64), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 0 + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f64) + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; GFX10-NEXT: [[INT10:%[0-9]+]]:_(f64), [[INT11:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](f64), [[UV3]](f64), 1 + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(f64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[INT10]], [[FMA8]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f64), [[FMA8]](f64), [[FMUL1]](f64), [[INT11]](i1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f64), [[UV3]](f64), [[UV1]](f64) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT6]](f64), [[INT13]](f64) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %3:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %4:_(<2 x f64>) = G_FDIV %2, %3 + %5:_(<2 x i64>) = G_BITCAST %4(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... --- @@ -1349,183 +1524,234 @@ body: | ; SI-LABEL: name: test_fdiv_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %45(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %51(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %46(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[FPEXT1]](f32), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT6]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](f32), [[FPEXT3]](f32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](f32), [[FPEXT3]](f32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[FPEXT3]](f32), [[FPEXT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT13]](f32) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; VI-LABEL: name: test_fdiv_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] - ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FMUL5]] - ; VI-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FPEXT2]] - ; VI-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[FADD4]], [[INT2]] - ; VI-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] - ; VI-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] - ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] - ; VI-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] - ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; VI-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST11]], [[FADD1]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST3]](f16), [[BITCAST2]](f16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT3]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](f32) + ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FMUL5]] + ; VI-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FPEXT2]] + ; VI-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[FADD4]], [[INT2]] + ; VI-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FADD5]] + ; VI-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL8]], [[FPEXT2]] + ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FADD6]], [[INT2]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL9]](f32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST12]], [[C1]] + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; VI-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[BITCAST13]], [[FADD5]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](f16), [[BITCAST5]](f16), [[BITCAST4]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST16]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST17]](<2 x i16>) ; ; GFX9-LABEL: name: test_fdiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FMUL5]] - ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FPEXT2]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[FADD4]], [[INT2]] - ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] - ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] - ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] - ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] - ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST11]], [[FADD1]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST3]](f16), [[BITCAST2]](f16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT3]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FMUL5]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FPEXT2]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[FADD4]], [[INT2]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FADD5]] + ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL8]], [[FPEXT2]] + ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FADD6]], [[INT2]] + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL9]](f32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST12]], [[C1]] + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[BITCAST13]], [[FADD5]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](f16), [[BITCAST5]](f16), [[BITCAST4]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT1]](f16), [[INT3]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x i16>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s16 ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %14(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %20(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %15(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %21(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST3]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST5]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FMUL]](f16), [[FMUL1]](f16) + ; GFX9-UNSAFE-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FDIV %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -1537,261 +1763,326 @@ body: | ; SI-LABEL: name: test_fdiv_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 - ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 - ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] - ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %100(i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %105(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %101(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF1]](<4 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %89(i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %95(i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST %90(i16) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST10]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST12]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST13]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST13]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[FPEXT1]](f32), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT6]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](f32), [[FPEXT3]](f32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](f32), [[FPEXT3]](f32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[FPEXT3]](f32), [[FPEXT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT13]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(f32), [[INT15:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](f32), [[FPEXT5]](f32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(f32), [[INT17:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](f32), [[FPEXT5]](f32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](f32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(f32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(f32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](f32), [[FMA11]](f32), [[FMA13]](f32), [[INT17]](i1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](f32), [[FPEXT5]](f32), [[FPEXT4]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT20]](f32) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST14]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST15]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST16]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_fdiv_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] - ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FMUL5]] - ; VI-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FPEXT2]] - ; VI-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[FADD4]], [[INT2]] - ; VI-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] - ; VI-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] - ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] - ; VI-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] - ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; VI-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; VI-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FMUL10]] - ; VI-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FPEXT4]] - ; VI-NEXT: [[FMUL12:%[0-9]+]]:_(s32) = G_FMUL [[FADD8]], [[INT4]] - ; VI-NEXT: [[FADD9:%[0-9]+]]:_(s32) = G_FADD [[FMUL12]], [[FMUL10]] - ; VI-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] - ; VI-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] - ; VI-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] - ; VI-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] - ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %103(i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %108(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %104(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF1]](<4 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %92(i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %98(i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST %93(i16) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST10]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST12]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST13]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST13]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST14]], [[C1]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; VI-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST15]], [[FADD1]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST7]](f16), [[BITCAST]](f16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT3]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](f32) + ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FMUL5]] + ; VI-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FPEXT2]] + ; VI-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[FADD4]], [[INT2]] + ; VI-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FADD5]] + ; VI-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL8]], [[FPEXT2]] + ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FADD6]], [[INT2]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL9]](f32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST16]], [[C1]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; VI-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[BITCAST17]], [[FADD5]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](f16), [[BITCAST9]](f16), [[BITCAST2]](f16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT5]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](f32) + ; VI-NEXT: [[FMUL10:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[INT4]] + ; VI-NEXT: [[FMUL11:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FMUL10]] + ; VI-NEXT: [[FADD8:%[0-9]+]]:_(f32) = G_FADD [[FMUL11]], [[FPEXT4]] + ; VI-NEXT: [[FMUL12:%[0-9]+]]:_(f32) = G_FMUL [[FADD8]], [[INT4]] + ; VI-NEXT: [[FADD9:%[0-9]+]]:_(f32) = G_FADD [[FMUL12]], [[FMUL10]] + ; VI-NEXT: [[FMUL13:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FADD9]] + ; VI-NEXT: [[FADD10:%[0-9]+]]:_(f32) = G_FADD [[FMUL13]], [[FPEXT4]] + ; VI-NEXT: [[FMUL14:%[0-9]+]]:_(f32) = G_FMUL [[FADD10]], [[INT4]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL14]](f32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST18]], [[C1]] + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(f32) = G_BITCAST [[AND2]](i32) + ; VI-NEXT: [[FADD11:%[0-9]+]]:_(f32) = G_FADD [[BITCAST19]], [[FADD9]] + ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD11]](f32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](f16), [[BITCAST8]](f16), [[BITCAST1]](f16) + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST20]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST21]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST22]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FMUL5]] - ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FPEXT2]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[FADD4]], [[INT2]] - ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] - ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] - ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] - ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] - ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX9-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FMUL10]] - ; GFX9-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FPEXT4]] - ; GFX9-NEXT: [[FMUL12:%[0-9]+]]:_(s32) = G_FMUL [[FADD8]], [[INT4]] - ; GFX9-NEXT: [[FADD9:%[0-9]+]]:_(s32) = G_FADD [[FMUL12]], [[FMUL10]] - ; GFX9-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] - ; GFX9-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] - ; GFX9-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] - ; GFX9-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] - ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %103(i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %108(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %104(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF1]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %92(i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %98(i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST %93(i16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST10]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST12]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST13]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST13]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST14]], [[C1]] + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST15]], [[FADD1]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST7]](f16), [[BITCAST]](f16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT3]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FMUL5]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FPEXT2]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[FADD4]], [[INT2]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FADD5]] + ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL8]], [[FPEXT2]] + ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FADD6]], [[INT2]] + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL9]](f32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST16]], [[C1]] + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[BITCAST17]], [[FADD5]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](f16), [[BITCAST9]](f16), [[BITCAST2]](f16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT5]] + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](f32) + ; GFX9-NEXT: [[FMUL10:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL11:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FMUL10]] + ; GFX9-NEXT: [[FADD8:%[0-9]+]]:_(f32) = G_FADD [[FMUL11]], [[FPEXT4]] + ; GFX9-NEXT: [[FMUL12:%[0-9]+]]:_(f32) = G_FMUL [[FADD8]], [[INT4]] + ; GFX9-NEXT: [[FADD9:%[0-9]+]]:_(f32) = G_FADD [[FMUL12]], [[FMUL10]] + ; GFX9-NEXT: [[FMUL13:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FADD9]] + ; GFX9-NEXT: [[FADD10:%[0-9]+]]:_(f32) = G_FADD [[FMUL13]], [[FPEXT4]] + ; GFX9-NEXT: [[FMUL14:%[0-9]+]]:_(f32) = G_FMUL [[FADD10]], [[INT4]] + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL14]](f32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST18]], [[C1]] + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(f32) = G_BITCAST [[AND2]](i32) + ; GFX9-NEXT: [[FADD11:%[0-9]+]]:_(f32) = G_FADD [[BITCAST19]], [[FADD9]] + ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD11]](f32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](f16), [[BITCAST8]](f16), [[BITCAST1]](f16) + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; GFX9-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST20]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST21]](i16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST22]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_IMPLICIT_DEF - %2:_(<3 x s16>) = G_FDIV %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - S_NOP 0, implicit %3 + ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %59(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %55(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF1]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %49(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST10]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST13:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST12]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST13]](i32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST13]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST7]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST9]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST8]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; GFX9-UNSAFE-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL1]](f16) + ; GFX9-UNSAFE-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL2]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST14]](i16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST15]](i16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST16]](i16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_IMPLICIT_DEF + %2:_(<3 x f16>) = G_FDIV %0, %1 + %3:_(<3 x i16>) = G_BITCAST %2(<3 x f16>) + %4:_(<3 x i32>) = G_ANYEXT %3(<3 x i16>) + S_NOP 0, implicit %4(<3 x i32>) ... --- @@ -1803,337 +2094,424 @@ body: | ; SI-LABEL: name: test_fdiv_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 - ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 - ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] - ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[INT21:%[0-9]+]]:_(s32), [[INT22:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 0 - ; SI-NEXT: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 1 - ; SI-NEXT: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32) - ; SI-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]] - ; SI-NEXT: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] - ; SI-NEXT: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] - ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]] - ; SI-NEXT: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] - ; SI-NEXT: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] - ; SI-NEXT: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] - ; SI-NEXT: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) - ; SI-NEXT: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %89(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %95(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %90(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %96(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %100(i16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %105(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %101(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %106(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[FPEXT1]](f32), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT6]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32), [[INT8:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](f32), [[FPEXT3]](f32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(f32), [[INT10:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](f32), [[FPEXT3]](f32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(f32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(f32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](f32), [[FMA6]](f32), [[FMA8]](f32), [[INT10]](i1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](f32), [[FPEXT3]](f32), [[FPEXT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT13]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(f32), [[INT15:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](f32), [[FPEXT5]](f32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(f32), [[INT17:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](f32), [[FPEXT5]](f32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](f32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(f32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(f32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](f32), [[FMA11]](f32), [[FMA13]](f32), [[INT17]](i1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](f32), [[FPEXT5]](f32), [[FPEXT4]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT20]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[INT21:%[0-9]+]]:_(f32), [[INT22:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](f32), [[FPEXT7]](f32), 0 + ; SI-NEXT: [[INT23:%[0-9]+]]:_(f32), [[INT24:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](f32), [[FPEXT7]](f32), 1 + ; SI-NEXT: [[INT25:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](f32) + ; SI-NEXT: [[FNEG3:%[0-9]+]]:_(f32) = G_FNEG [[INT21]] + ; SI-NEXT: [[FMA15:%[0-9]+]]:_(f32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] + ; SI-NEXT: [[FMA16:%[0-9]+]]:_(f32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] + ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT23]], [[FMA16]] + ; SI-NEXT: [[FMA17:%[0-9]+]]:_(f32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] + ; SI-NEXT: [[FMA18:%[0-9]+]]:_(f32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] + ; SI-NEXT: [[FMA19:%[0-9]+]]:_(f32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] + ; SI-NEXT: [[INT26:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](f32), [[FMA16]](f32), [[FMA18]](f32), [[INT24]](i1) + ; SI-NEXT: [[INT27:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](f32), [[FPEXT7]](f32), [[FPEXT6]](f32) + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT27]](f32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; VI-LABEL: name: test_fdiv_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] - ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FMUL5]] - ; VI-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FPEXT2]] - ; VI-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[FADD4]], [[INT2]] - ; VI-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] - ; VI-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] - ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] - ; VI-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] - ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; VI-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; VI-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FMUL10]] - ; VI-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FPEXT4]] - ; VI-NEXT: [[FMUL12:%[0-9]+]]:_(s32) = G_FMUL [[FADD8]], [[INT4]] - ; VI-NEXT: [[FADD9:%[0-9]+]]:_(s32) = G_FADD [[FMUL12]], [[FMUL10]] - ; VI-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] - ; VI-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] - ; VI-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] - ; VI-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] - ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; VI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; VI-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT7]] - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; VI-NEXT: [[FMUL15:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; VI-NEXT: [[FMUL16:%[0-9]+]]:_(s32) = G_FMUL [[FNEG3]], [[FMUL15]] - ; VI-NEXT: [[FADD12:%[0-9]+]]:_(s32) = G_FADD [[FMUL16]], [[FPEXT6]] - ; VI-NEXT: [[FMUL17:%[0-9]+]]:_(s32) = G_FMUL [[FADD12]], [[INT6]] - ; VI-NEXT: [[FADD13:%[0-9]+]]:_(s32) = G_FADD [[FMUL17]], [[FMUL15]] - ; VI-NEXT: [[FMUL18:%[0-9]+]]:_(s32) = G_FMUL [[FNEG3]], [[FADD13]] - ; VI-NEXT: [[FADD14:%[0-9]+]]:_(s32) = G_FADD [[FMUL18]], [[FPEXT6]] - ; VI-NEXT: [[FMUL19:%[0-9]+]]:_(s32) = G_FMUL [[FADD14]], [[INT6]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[FMUL19]], [[C1]] - ; VI-NEXT: [[FADD15:%[0-9]+]]:_(s32) = G_FADD [[AND3]], [[FADD13]] - ; VI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD15]](s32) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %93(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %99(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %94(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %100(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %104(i16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %109(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %105(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %110(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; VI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST18]], [[C1]] + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; VI-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST19]], [[FADD1]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST10]](f16), [[BITCAST2]](f16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT3]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](f32) + ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FMUL5]] + ; VI-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FPEXT2]] + ; VI-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[FADD4]], [[INT2]] + ; VI-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FADD5]] + ; VI-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL8]], [[FPEXT2]] + ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FADD6]], [[INT2]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL9]](f32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST20]], [[C1]] + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; VI-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[BITCAST21]], [[FADD5]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](f16), [[BITCAST12]](f16), [[BITCAST4]](f16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT5]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](f32) + ; VI-NEXT: [[FMUL10:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[INT4]] + ; VI-NEXT: [[FMUL11:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FMUL10]] + ; VI-NEXT: [[FADD8:%[0-9]+]]:_(f32) = G_FADD [[FMUL11]], [[FPEXT4]] + ; VI-NEXT: [[FMUL12:%[0-9]+]]:_(f32) = G_FMUL [[FADD8]], [[INT4]] + ; VI-NEXT: [[FADD9:%[0-9]+]]:_(f32) = G_FADD [[FMUL12]], [[FMUL10]] + ; VI-NEXT: [[FMUL13:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FADD9]] + ; VI-NEXT: [[FADD10:%[0-9]+]]:_(f32) = G_FADD [[FMUL13]], [[FPEXT4]] + ; VI-NEXT: [[FMUL14:%[0-9]+]]:_(f32) = G_FMUL [[FADD10]], [[INT4]] + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL14]](f32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST22]], [[C1]] + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(f32) = G_BITCAST [[AND2]](i32) + ; VI-NEXT: [[FADD11:%[0-9]+]]:_(f32) = G_FADD [[BITCAST23]], [[FADD9]] + ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD11]](f32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](f16), [[BITCAST11]](f16), [[BITCAST3]](f16) + ; VI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; VI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; VI-NEXT: [[FNEG3:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT7]] + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](f32) + ; VI-NEXT: [[FMUL15:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT6]], [[INT6]] + ; VI-NEXT: [[FMUL16:%[0-9]+]]:_(f32) = G_FMUL [[FNEG3]], [[FMUL15]] + ; VI-NEXT: [[FADD12:%[0-9]+]]:_(f32) = G_FADD [[FMUL16]], [[FPEXT6]] + ; VI-NEXT: [[FMUL17:%[0-9]+]]:_(f32) = G_FMUL [[FADD12]], [[INT6]] + ; VI-NEXT: [[FADD13:%[0-9]+]]:_(f32) = G_FADD [[FMUL17]], [[FMUL15]] + ; VI-NEXT: [[FMUL18:%[0-9]+]]:_(f32) = G_FMUL [[FNEG3]], [[FADD13]] + ; VI-NEXT: [[FADD14:%[0-9]+]]:_(f32) = G_FADD [[FMUL18]], [[FPEXT6]] + ; VI-NEXT: [[FMUL19:%[0-9]+]]:_(f32) = G_FMUL [[FADD14]], [[INT6]] + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL19]](f32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST24]], [[C1]] + ; VI-NEXT: [[BITCAST25:%[0-9]+]]:_(f32) = G_BITCAST [[AND3]](i32) + ; VI-NEXT: [[FADD15:%[0-9]+]]:_(f32) = G_FADD [[BITCAST25]], [[FADD13]] + ; VI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD15]](f32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](f16), [[BITCAST13]](f16), [[BITCAST5]](f16) + ; VI-NEXT: [[BITCAST26:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST26]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST28:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST29:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; VI-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[INT7]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST29]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST31:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST28]](<2 x f16>), [[BITCAST31]](<2 x f16>) + ; VI-NEXT: [[BITCAST32:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST32]](<4 x i16>) ; ; GFX9-LABEL: name: test_fdiv_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FMUL]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FADD]], [[INT]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FMUL5]] - ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FPEXT2]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[FADD4]], [[INT2]] - ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] - ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] - ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] - ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] - ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX9-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FMUL10]] - ; GFX9-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FPEXT4]] - ; GFX9-NEXT: [[FMUL12:%[0-9]+]]:_(s32) = G_FMUL [[FADD8]], [[INT4]] - ; GFX9-NEXT: [[FADD9:%[0-9]+]]:_(s32) = G_FADD [[FMUL12]], [[FMUL10]] - ; GFX9-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] - ; GFX9-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] - ; GFX9-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] - ; GFX9-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] - ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX9-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT7]] - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; GFX9-NEXT: [[FMUL15:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; GFX9-NEXT: [[FMUL16:%[0-9]+]]:_(s32) = G_FMUL [[FNEG3]], [[FMUL15]] - ; GFX9-NEXT: [[FADD12:%[0-9]+]]:_(s32) = G_FADD [[FMUL16]], [[FPEXT6]] - ; GFX9-NEXT: [[FMUL17:%[0-9]+]]:_(s32) = G_FMUL [[FADD12]], [[INT6]] - ; GFX9-NEXT: [[FADD13:%[0-9]+]]:_(s32) = G_FADD [[FMUL17]], [[FMUL15]] - ; GFX9-NEXT: [[FMUL18:%[0-9]+]]:_(s32) = G_FMUL [[FNEG3]], [[FADD13]] - ; GFX9-NEXT: [[FADD14:%[0-9]+]]:_(s32) = G_FADD [[FMUL18]], [[FPEXT6]] - ; GFX9-NEXT: [[FMUL19:%[0-9]+]]:_(s32) = G_FMUL [[FADD14]], [[INT6]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[FMUL19]], [[C1]] - ; GFX9-NEXT: [[FADD15:%[0-9]+]]:_(s32) = G_FADD [[AND3]], [[FADD13]] - ; GFX9-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD15]](s32) - ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %93(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %99(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %94(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %100(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %104(i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %109(i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %105(i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %110(i16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FMUL]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FADD]], [[INT]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FNEG]], [[FADD1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FPEXT]] + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[FADD2]], [[INT]] + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL4]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -8388608 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST18]], [[C1]] + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[BITCAST19]], [[FADD1]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](f16), [[BITCAST10]](f16), [[BITCAST2]](f16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT3]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FMUL5]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FPEXT2]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[FADD4]], [[INT2]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[FNEG1]], [[FADD5]] + ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL8]], [[FPEXT2]] + ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FADD6]], [[INT2]] + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL9]](f32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST20]], [[C1]] + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[BITCAST21]], [[FADD5]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](f16), [[BITCAST12]](f16), [[BITCAST4]](f16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT5]] + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](f32) + ; GFX9-NEXT: [[FMUL10:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL11:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FMUL10]] + ; GFX9-NEXT: [[FADD8:%[0-9]+]]:_(f32) = G_FADD [[FMUL11]], [[FPEXT4]] + ; GFX9-NEXT: [[FMUL12:%[0-9]+]]:_(f32) = G_FMUL [[FADD8]], [[INT4]] + ; GFX9-NEXT: [[FADD9:%[0-9]+]]:_(f32) = G_FADD [[FMUL12]], [[FMUL10]] + ; GFX9-NEXT: [[FMUL13:%[0-9]+]]:_(f32) = G_FMUL [[FNEG2]], [[FADD9]] + ; GFX9-NEXT: [[FADD10:%[0-9]+]]:_(f32) = G_FADD [[FMUL13]], [[FPEXT4]] + ; GFX9-NEXT: [[FMUL14:%[0-9]+]]:_(f32) = G_FMUL [[FADD10]], [[INT4]] + ; GFX9-NEXT: [[BITCAST22:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL14]](f32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST22]], [[C1]] + ; GFX9-NEXT: [[BITCAST23:%[0-9]+]]:_(f32) = G_BITCAST [[AND2]](i32) + ; GFX9-NEXT: [[FADD11:%[0-9]+]]:_(f32) = G_FADD [[BITCAST23]], [[FADD9]] + ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD11]](f32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](f16), [[BITCAST11]](f16), [[BITCAST3]](f16) + ; GFX9-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX9-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; GFX9-NEXT: [[FNEG3:%[0-9]+]]:_(f32) = G_FNEG [[FPEXT7]] + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](f32) + ; GFX9-NEXT: [[FMUL15:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT6]], [[INT6]] + ; GFX9-NEXT: [[FMUL16:%[0-9]+]]:_(f32) = G_FMUL [[FNEG3]], [[FMUL15]] + ; GFX9-NEXT: [[FADD12:%[0-9]+]]:_(f32) = G_FADD [[FMUL16]], [[FPEXT6]] + ; GFX9-NEXT: [[FMUL17:%[0-9]+]]:_(f32) = G_FMUL [[FADD12]], [[INT6]] + ; GFX9-NEXT: [[FADD13:%[0-9]+]]:_(f32) = G_FADD [[FMUL17]], [[FMUL15]] + ; GFX9-NEXT: [[FMUL18:%[0-9]+]]:_(f32) = G_FMUL [[FNEG3]], [[FADD13]] + ; GFX9-NEXT: [[FADD14:%[0-9]+]]:_(f32) = G_FADD [[FMUL18]], [[FPEXT6]] + ; GFX9-NEXT: [[FMUL19:%[0-9]+]]:_(f32) = G_FMUL [[FADD14]], [[INT6]] + ; GFX9-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL19]](f32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST24]], [[C1]] + ; GFX9-NEXT: [[BITCAST25:%[0-9]+]]:_(f32) = G_BITCAST [[AND3]](i32) + ; GFX9-NEXT: [[FADD15:%[0-9]+]]:_(f32) = G_FADD [[BITCAST25]], [[FADD13]] + ; GFX9-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD15]](f32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](f16), [[BITCAST13]](f16), [[BITCAST5]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT1]](f16), [[INT3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT5]](f16), [[INT7]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST26:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST26]](<4 x i16>) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_v4s16 ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNSAFE-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-UNSAFE-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-UNSAFE-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) - ; GFX9-UNSAFE-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16) - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %34(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %45(i16) + ; GFX9-UNSAFE-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; GFX9-UNSAFE-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9-UNSAFE-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9-UNSAFE-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST10]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST12]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST11]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST13]](f16) + ; GFX9-UNSAFE-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[INT3]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FMUL]](f16), [[FMUL1]](f16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FMUL2]](f16), [[FMUL3]](f16) + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST18]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %3:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %4:_(<4 x f16>) = G_FDIV %2, %3 + %5:_(<4 x i16>) = G_BITCAST %4(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- @@ -2145,69 +2523,81 @@ body: | ; SI-LABEL: name: test_fdiv_s16_constant_one_rcp ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[FPEXT1]](f32), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT6]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fdiv_s16_constant_one_rcp ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX9-UNSAFE: liveins: $vgpr0 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX10-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s16) = G_FCONSTANT half 1.0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %1 - %3:_(s16) = G_FDIV %0, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(f16) = G_FCONSTANT half 0xH3C00 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(f16) = G_BITCAST %2(i16) + %4:_(f16) = G_FDIV %0, %3 + %5:_(i16) = G_BITCAST %4(f16) + %6:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... --- @@ -2219,73 +2609,85 @@ body: | ; SI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xHBC00 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](f32), [[FPEXT1]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[FPEXT1]](f32), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT6]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](f16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](f16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX9-UNSAFE: liveins: $vgpr0 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](f16) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX10-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s16) = G_FCONSTANT half -1.0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %1 - %3:_(s16) = G_FDIV %0, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](f16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(f16) = G_FCONSTANT half 0xHBC00 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(f16) = G_BITCAST %2(i16) + %4:_(f16) = G_FDIV %0, %3 + %5:_(i16) = G_BITCAST %4(f16) + %6:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... --- @@ -2297,89 +2699,101 @@ body: | ; SI-LABEL: name: test_fdiv_s32_constant_one_rcp ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; VI-LABEL: name: test_fdiv_s32_constant_one_rcp ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9-UNSAFE: liveins: $vgpr0 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX10-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) - %0:_(s32) = G_FCONSTANT float 1.0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_FDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(f32) = G_FCONSTANT float 1.000000e+00 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_FDIV %0, %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -2391,94 +2805,106 @@ body: | ; SI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; VI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9-UNSAFE: liveins: $vgpr0 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](f32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX10-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) - %0:_(s32) = G_FCONSTANT float -1.0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_FDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f32), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f32), [[INT3:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f32), [[BITCAST]](f32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f32), [[FMA1]](f32), [[FMA3]](f32), [[INT3]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f32), [[BITCAST]](f32), [[C]](f32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT6]](f32) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(f32) = G_FCONSTANT float -1.000000e+00 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_FDIV %0, %2 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -2497,105 +2923,121 @@ body: | ; SI-LABEL: name: test_fdiv_s64_constant_one_rcp ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[C]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[INT]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[INT3]](f64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[XOR]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; VI-LABEL: name: test_fdiv_s64_constant_one_rcp ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX9-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] - ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[C]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FMA5]](f64) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX10-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) - %0:_(s64) = G_FCONSTANT double 1.0 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s64) = G_FDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(f64) = G_FCONSTANT double 1.000000e+00 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(f64) = G_BITCAST %1(i64) + %3:_(f64) = G_FDIV %0, %2 + %4:_(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -2614,108 +3056,124 @@ body: | ; SI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[C]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[INT]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[INT3]](f64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[XOR]](i1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) ; ; VI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX9-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX9-UNSAFE: liveins: $vgpr0_vgpr1 ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-UNSAFE-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C1]] - ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] - ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] - ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; GFX9-UNSAFE-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT]], [[C1]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[C]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FMA5]](f64) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX10-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) - %0:_(s64) = G_FCONSTANT double -1.0 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s64) = G_FDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(f64), [[INT1:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](f64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(f64), [[INT4:%[0-9]+]]:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](f64), [[BITCAST]](f64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](f64), [[FMA3]](f64), [[FMUL]](f64), [[INT4]](i1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](f64), [[BITCAST]](f64), [[C]](f64) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT6]](f64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(f64) = G_FCONSTANT double -1.000000e+00 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(f64) = G_BITCAST %1(i64) + %3:_(f64) = G_FDIV %0, %2 + %4:_(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir index 1b675a83307e8..ae6829c0a8ad5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -14,89 +14,101 @@ body: | ; GFX6-LABEL: name: test_fexp_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX8-LABEL: name: test_fexp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[AND]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C2]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C5]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[BITCAST2]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST2]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-LABEL: name: test_fexp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FEXP %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FEXP %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -108,89 +120,101 @@ body: | ; GFX6-LABEL: name: test_fexp_s32_nnan ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL]] - ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[BITCAST]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[BITCAST]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX8-LABEL: name: test_fexp_s32_nnan ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[COPY]], [[AND]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[AND]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C2]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL2]], [[FMUL1]] - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = nnan G_FMUL [[AND]], [[C2]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C5]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[BITCAST]], [[BITCAST2]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = nnan G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST2]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = nnan G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX9-LABEL: name: test_fexp_s32_nnan ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY]](s32), [[C4]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan G_FEXP %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[BITCAST]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[BITCAST]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[BITCAST]](f32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan G_FEXP %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -202,144 +226,158 @@ body: | ; GFX6-LABEL: name: test_fexp_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] - ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] - ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](f32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[INT1]], [[FPTOSI1]](i32) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[FLDEXP1]] + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f32), [[C4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C5]], [[SELECT2]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[SELECT3]](f32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY1]](f32), [[COPY2]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX8-LABEL: name: test_fexp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[AND]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C2]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C5]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[AND1]] - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C1]] - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C2]] - ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C1]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]] - ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]] - ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]] - ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]] - ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]] - ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32) - ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[FLDEXP1]] - ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[SELECT2]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[BITCAST2]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST2]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[BITCAST4]] + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FSUB2]], [[C2]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FSUB2]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FMUL5]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FADD3]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(f32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FSUB3]], [[FADD4]] + ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](f32) + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](f32) + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[INT1]], [[FPTOSI1]](i32) + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C4]], [[FLDEXP1]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C6]], [[SELECT2]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[SELECT3]](f32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY1]](f32), [[COPY2]](f32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fexp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] - ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FEXP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](f32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[INT1]], [[FPTOSI1]](i32) + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[FLDEXP1]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f32), [[C4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C5]], [[SELECT2]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[SELECT3]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[COPY1]](f32), [[COPY2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FEXP %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -351,193 +389,209 @@ body: | ; GFX6-LABEL: name: test_fexp_v3s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] - ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] - ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]] - ; GFX6-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]] - ; GFX6-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]] - ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]] - ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]] - ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32) - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX6-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) - ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[FLDEXP2]] - ; GFX6-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV2]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C5]], [[SELECT4]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f32), [[C4]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](f32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[INT1]], [[FPTOSI1]](i32) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[FLDEXP1]] + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f32), [[C4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C5]], [[SELECT2]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[SELECT3]](f32) + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FMUL2]] + ; GFX6-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[C]], [[FNEG2]] + ; GFX6-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[C1]], [[FMA4]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]] + ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FSUB2]], [[FMA5]] + ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](f32) + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX6-NEXT: [[FLDEXP2:%[0-9]+]]:_(f32) = G_FLDEXP [[INT2]], [[FPTOSI2]](i32) + ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C2]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C3]], [[FLDEXP2]] + ; GFX6-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV2]](f32), [[C4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C5]], [[SELECT4]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY [[SELECT5]](f32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[COPY1]](f32), [[COPY2]](f32), [[COPY3]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX8-LABEL: name: test_fexp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4096 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[AND]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7140000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3F347652A0000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C2]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[FLDEXP]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C5]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[SELECT]] - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[AND1]] - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C1]] - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C2]] - ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C1]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]] - ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]] - ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]] - ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]] - ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]] - ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32) - ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[FLDEXP1]] - ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[SELECT2]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8-NEXT: [[FSUB4:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[AND2]] - ; GFX8-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[AND2]], [[C1]] - ; GFX8-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FSUB4]], [[C2]] - ; GFX8-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FSUB4]], [[C1]] - ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL10]], [[FMUL9]] - ; GFX8-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[AND2]], [[C2]] - ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FADD6]] - ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL8]] - ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(s32) = G_FSUB [[FMUL8]], [[INTRINSIC_ROUNDEVEN2]] - ; GFX8-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FSUB5]], [[FADD7]] - ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32) - ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD8]](s32) - ; GFX8-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) - ; GFX8-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C3]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C4]], [[FLDEXP2]] - ; GFX8-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV2]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C6]], [[SELECT4]] - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -4096 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[AND]](i32) + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[BITCAST2]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7140000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3F347652A0000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FSUB]], [[C2]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FSUB]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[FMUL1]] + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST2]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FADD]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FADD1]] + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C4]], [[FLDEXP]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f32), [[C5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[SELECT]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(f32) = G_BITCAST [[AND1]](i32) + ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[BITCAST4]] + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[FSUB2]], [[C2]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FSUB2]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL6]], [[FMUL5]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FADD3]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(f32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FSUB3]], [[FADD4]] + ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](f32) + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](f32) + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[INT1]], [[FPTOSI1]](i32) + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C4]], [[FLDEXP1]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C6]], [[SELECT2]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[SELECT3]](f32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](f32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[AND2]](i32) + ; GFX8-NEXT: [[FSUB4:%[0-9]+]]:_(f32) = G_FSUB [[UV2]], [[BITCAST6]] + ; GFX8-NEXT: [[FMUL8:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST6]], [[C1]] + ; GFX8-NEXT: [[FMUL9:%[0-9]+]]:_(f32) = G_FMUL [[FSUB4]], [[C2]] + ; GFX8-NEXT: [[FMUL10:%[0-9]+]]:_(f32) = G_FMUL [[FSUB4]], [[C1]] + ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FMUL10]], [[FMUL9]] + ; GFX8-NEXT: [[FMUL11:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST6]], [[C2]] + ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[FMUL11]], [[FADD6]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL8]] + ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(f32) = G_FSUB [[FMUL8]], [[INTRINSIC_ROUNDEVEN2]] + ; GFX8-NEXT: [[FADD8:%[0-9]+]]:_(f32) = G_FADD [[FSUB5]], [[FADD7]] + ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](f32) + ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD8]](f32) + ; GFX8-NEXT: [[FLDEXP2:%[0-9]+]]:_(f32) = G_FLDEXP [[INT2]], [[FPTOSI2]](i32) + ; GFX8-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C4]], [[FLDEXP2]] + ; GFX8-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV2]](f32), [[C5]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C6]], [[SELECT4]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY [[SELECT5]](f32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[COPY1]](f32), [[COPY2]](f32), [[COPY3]](f32) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST7]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fexp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]] - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[FLDEXP]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40562E4300000000 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV]](s32), [[C4]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C5]], [[SELECT]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[FLDEXP1]] - ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV1]](s32), [[C4]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C5]], [[SELECT2]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]] - ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]] - ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]] - ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]] - ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32) - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX9-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) - ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[FLDEXP2]] - ; GFX9-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UV2]](s32), [[C4]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C5]], [[SELECT4]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FEXP %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C]], [[FNEG]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E54AE0BE0000000 + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[C1]], [[FMA]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FSUB]], [[FMA1]] + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](f32) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[INT]], [[FPTOSI]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC059D1DA00000000 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[FLDEXP]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40562E4300000000 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV]](f32), [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C5]], [[SELECT]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(f32) = COPY [[SELECT1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C]], [[FNEG1]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[C1]], [[FMA2]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FSUB1]], [[FMA3]] + ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](f32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[INT1]], [[FPTOSI1]](i32) + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[FLDEXP1]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV1]](f32), [[C4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C5]], [[SELECT2]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(f32) = COPY [[SELECT3]](f32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FMUL2]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[C]], [[FNEG2]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[C1]], [[FMA4]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(f32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FSUB2]], [[FMA5]] + ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(i32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](f32) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX9-NEXT: [[FLDEXP2:%[0-9]+]]:_(f32) = G_FLDEXP [[INT2]], [[FPTOSI2]](i32) + ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C2]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C3]], [[FLDEXP2]] + ; GFX9-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[UV2]](f32), [[C4]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C5]], [[SELECT4]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(f32) = COPY [[SELECT5]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[COPY1]](f32), [[COPY2]](f32), [[COPY3]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FEXP %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -549,44 +603,54 @@ body: | ; GFX6-LABEL: name: test_fexp_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](f32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_fexp_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX8-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](f32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fexp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FEXP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](f32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FEXP %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -598,59 +662,69 @@ body: | ; GFX6-LABEL: name: test_fexp_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](s32) - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT %5(f16) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](f32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT %6(f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](f32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + ; ; GFX8-LABEL: name: test_fexp_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX8-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](s32) - ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT %5(f16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](f32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX8-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT %6(f16) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](f32) + ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fexp_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](s32) - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](s32) - ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FEXP %1 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT %5(f16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL]](f32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT %6(f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FMUL1]](f32) + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FPTRUNC]](f16), [[FPTRUNC1]](f16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_FEXP %1 + %2:_(<2 x i16>) = G_BITCAST %1(<2 x f16>) + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir index adef120554bbf..afc21e87f28b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir @@ -14,54 +14,64 @@ body: | ; GFX6-LABEL: name: test_fexp2_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX8-LABEL: name: test_fexp2_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[SELECT]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fexp2_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FEXP2 %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[BITCAST]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FEXP2 %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -73,78 +83,88 @@ body: | ; GFX6-LABEL: name: test_fexp2_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[SELECT2]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[SELECT3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX8-LABEL: name: test_fexp2_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[SELECT]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[SELECT2]] + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[SELECT3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fexp2_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FEXP2 %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[SELECT2]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[SELECT3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FEXP2 %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -156,96 +176,106 @@ body: | ; GFX6-LABEL: name: test_fexp2_v3s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[SELECT4]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[SELECT2]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[SELECT3]] + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[UV2]], [[SELECT4]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT5]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX8-LABEL: name: test_fexp2_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[SELECT4]] - ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[SELECT]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[SELECT2]] + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[SELECT3]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[UV2]], [[SELECT4]] + ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT5]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fexp2_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[SELECT4]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FEXP2 %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[UV]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[UV1]], [[SELECT2]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[SELECT3]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[UV2]], [[SELECT4]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FEXP2 %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -257,34 +287,44 @@ body: | ; GFX6-LABEL: name: test_fexp2_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT]](f32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_fexp2_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[TRUNC]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = G_FEXP2 [[BITCAST]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FEXP2_]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fexp2_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FEXP2 %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = G_FEXP2 [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FEXP2_]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FEXP2 %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -296,55 +336,78 @@ body: | ; GFX6-LABEL: name: test_fexp2_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT1]](s32) - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT]](f32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT1]](f32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX8-LABEL: name: test_fexp2_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[TRUNC]] - ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[TRUNC1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = G_FEXP2 [[BITCAST1]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(f16) = G_FEXP2 [[BITCAST2]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FEXP2_]](f16) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FEXP2_1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fexp2_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[TRUNC]] - ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[TRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FEXP2_]](s16), [[FEXP2_1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FEXP2 %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = G_FEXP2 [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(f16) = G_FEXP2 [[BITCAST2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FEXP2_]](f16), [[FEXP2_1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FEXP2 %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir index cf55edbc2456e..18716ebb99fb2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -14,24 +14,34 @@ body: | ; SI-LABEL: name: test_ffloor_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FFLOOR]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_ffloor_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FFLOOR]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_ffloor_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FFLOOR %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FFLOOR]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FFLOOR %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -43,30 +53,40 @@ body: | ; SI-LABEL: name: test_ffloor_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[BITCAST]](f64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[BITCAST]](f64), [[BITCAST]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[BITCAST]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_ffloor_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FFLOOR]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_ffloor_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FFLOOR %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FFLOOR]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FFLOOR %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -79,28 +99,38 @@ body: | ; SI-LABEL: name: test_ffloor_s64_nnan ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT]], [[C]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[BITCAST]](f64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = nnan G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = nnan G_FNEG [[FMINNUM_IEEE]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = nnan G_FADD [[BITCAST]], [[FNEG]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_ffloor_s64_nnan ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = nnan G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FFLOOR]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_ffloor_s64_nnan ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = nnan G_FFLOOR %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = nnan G_FFLOOR [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FFLOOR]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = nnan G_FFLOOR %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -113,30 +143,40 @@ body: | ; SI-LABEL: name: test_ffloor_s64_nssaz ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nsz G_FMINNUM_IEEE [[INT]], [[C]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = nsz G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[BITCAST]](f64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = nsz G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nsz G_FCMP floatpred(ord), [[BITCAST]](f64), [[BITCAST]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = nsz G_SELECT [[FCMP]](i1), [[BITCAST]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = nsz G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = nsz G_FADD [[BITCAST]], [[FNEG]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_ffloor_s64_nssaz ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = nsz G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FFLOOR]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_ffloor_s64_nssaz ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = nsz G_FFLOOR %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = nsz G_FFLOOR [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FFLOOR]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = nsz G_FFLOOR %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -149,34 +189,44 @@ body: | ; SI-LABEL: name: test_ffloor_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; VI-LABEL: name: test_ffloor_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_ffloor_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FFLOOR %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FFLOOR %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -188,33 +238,43 @@ body: | ; SI-LABEL: name: test_ffloor_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[UV]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FFLOOR]](f32), [[FFLOOR1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; VI-LABEL: name: test_ffloor_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FFLOOR]](f32), [[FFLOOR1]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_ffloor_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FFLOOR %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FFLOOR]](f32), [[FFLOOR1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FFLOOR %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -226,36 +286,46 @@ body: | ; SI-LABEL: name: test_ffloor_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[UV]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[UV1]] + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(f32) = G_FFLOOR [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FFLOOR]](f32), [[FFLOOR1]](f32), [[FFLOOR2]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; VI-LABEL: name: test_ffloor_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(f32) = G_FFLOOR [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FFLOOR]](f32), [[FFLOOR1]](f32), [[FFLOOR2]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX9-LABEL: name: test_ffloor_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FFLOOR %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(f32) = G_FFLOOR [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FFLOOR]](f32), [[FFLOOR1]](f32), [[FFLOOR2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FFLOOR %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -267,44 +337,54 @@ body: | ; SI-LABEL: name: test_ffloor_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV]](s64), [[UV]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](s64) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C]] - ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV1]](s64), [[UV1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FMINNUM_IEEE1]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](f64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[UV]](f64), [[UV]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[UV]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](f64) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT1]], [[C]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[UV1]](f64), [[UV1]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[FCMP1]](i1), [[UV1]], [[FMINNUM_IEEE1]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[SELECT1]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; VI-LABEL: name: test_ffloor_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f64) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FFLOOR]](f64), [[FFLOOR1]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; GFX9-LABEL: name: test_ffloor_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FFLOOR %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(f64) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FFLOOR]](f64), [[FFLOOR1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FFLOOR %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -316,57 +396,80 @@ body: | ; SI-LABEL: name: test_ffloor_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; VI-LABEL: name: test_ffloor_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_ffloor_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FFLOOR %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FFLOOR]](f16), [[FFLOOR1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FFLOOR %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -375,69 +478,96 @@ body: | bb.0: ; SI-LABEL: name: test_ffloor_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR1]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR2]](f32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_ffloor_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST2]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR]](f16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR1]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR2]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_ffloor_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FFLOOR %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST2]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR2]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FFLOOR %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -449,94 +579,130 @@ body: | ; SI-LABEL: name: test_ffloor_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR1]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR2]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FFLOOR3:%[0-9]+]]:_(f32) = G_FFLOOR [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FFLOOR3]](f32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; VI-LABEL: name: test_ffloor_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI-NEXT: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST1]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST3]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST2]] + ; VI-NEXT: [[FFLOOR3:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST4]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR]](f16) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR2]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FFLOOR3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9-NEXT: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FFLOOR %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST1]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST3]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST2]] + ; GFX9-NEXT: [[FFLOOR3:%[0-9]+]]:_(f16) = G_FFLOOR [[BITCAST4]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FFLOOR]](f16), [[FFLOOR1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FFLOOR2]](f16), [[FFLOOR3]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST9]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FFLOOR %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir index 06fbbefd2c58e..e135b8a7bb30d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir @@ -10,33 +10,37 @@ body: | ; CHECK-LABEL: name: test_flog_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FLOG %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FLOG %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -48,33 +52,37 @@ body: | ; CHECK-LABEL: name: test_flog_s32_flags ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan G_FLOG %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan G_FLOG %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -86,49 +94,53 @@ body: | ; CHECK-LABEL: name: test_flog_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] - ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] - ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] - ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] - ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] - ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FLOG %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT3]] + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS1]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FLOG %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -140,63 +152,67 @@ body: | ; CHECK-LABEL: name: test_flog_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] - ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] - ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] - ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] - ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] - ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT6]] - ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32) - ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[C3]] - ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL5]] - ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C3]], [[FNEG2]] - ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C4]], [[FMA4]] - ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[FMA5]] - ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[INT2]] - ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS2]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[FADD2]], [[INT2]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[SELECT7]], [[SELECT8]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FLOG %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT3]] + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS1]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[SELECT6]] + ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](f32) + ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[C3]] + ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FMUL5]] + ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[INT2]], [[C3]], [[FNEG2]] + ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[INT2]], [[C4]], [[FMA4]] + ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL5]], [[FMA5]] + ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(f32) = G_FABS [[INT2]] + ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS2]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[FADD2]], [[INT2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[SELECT7]], [[SELECT8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32), [[FSUB2]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FLOG %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -208,20 +224,24 @@ body: | ; CHECK-LABEL: name: test_flog_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FLOG %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FLOG %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... @@ -234,29 +254,38 @@ body: | ; CHECK-LABEL: name: test_flog_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C1]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT1]](s32) - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C1]] - ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FLOG %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %15(i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C1]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT1]](f32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[C1]] + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FLOG %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir index 28cd783473978..fe999f0b1b702 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir @@ -10,33 +10,37 @@ body: | ; CHECK-LABEL: name: test_flog10_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FLOG10 %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FLOG10 %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -48,33 +52,37 @@ body: | ; CHECK-LABEL: name: test_flog10_s32_flags ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan G_FLOG10 %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = nnan G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = nnan G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan G_FLOG10 %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -86,49 +94,53 @@ body: | ; CHECK-LABEL: name: test_flog10_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] - ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] - ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] - ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] - ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] - ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FLOG10 %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT3]] + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS1]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FLOG10 %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -140,63 +152,67 @@ body: | ; CHECK-LABEL: name: test_flog10_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] - ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] - ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] - ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] - ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] - ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] - ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] - ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] - ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT6]] - ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32) - ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[C3]] - ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL5]] - ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C3]], [[FNEG2]] - ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C4]], [[FMA4]] - ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[FMA5]] - ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[INT2]] - ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS2]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[FADD2]], [[INT2]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[SELECT7]], [[SELECT8]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FLOG10 %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT3]] + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS1]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[SELECT6]] + ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](f32) + ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[C3]] + ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[FMUL5]] + ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[INT2]], [[C3]], [[FNEG2]] + ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[INT2]], [[C4]], [[FMA4]] + ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL5]], [[FMA5]] + ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(f32) = G_FABS [[INT2]] + ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[FABS2]](f32), [[C5]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[FADD2]], [[INT2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[SELECT7]], [[SELECT8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32), [[FSUB2]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FLOG10 %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -208,20 +224,24 @@ body: | ; CHECK-LABEL: name: test_flog10_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FLOG10 %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FD3441360000000 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FLOG10 %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... @@ -234,29 +254,38 @@ body: | ; CHECK-LABEL: name: test_flog10_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C1]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT1]](s32) - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C1]] - ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FLOG10 %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %15(i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FD3441360000000 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT]], [[C1]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT1]](f32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT1]], [[C1]] + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FLOG10 %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog2.mir index 42135d4bca4a0..59de39f16bea0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog2.mir @@ -10,22 +10,26 @@ body: | ; CHECK-LABEL: name: test_flog2_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FLOG2 %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FLOG2 %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -37,30 +41,34 @@ body: | ; CHECK-LABEL: name: test_flog2_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]] - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FLOG2 %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT2]] + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](f32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[INT1]], [[SELECT3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FLOG2 %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -72,34 +80,38 @@ body: | ; CHECK-LABEL: name: test_flog2_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]] - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]] - ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FLOG2 %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT2]] + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](f32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C3]], [[C4]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[INT1]], [[SELECT3]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[SELECT4]] + ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[INT2]], [[SELECT5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32), [[FSUB2]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FLOG2 %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index 2ccdc5d9c6b73..cdec643bf722e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -14,34 +14,50 @@ body: | ; SI-LABEL: name: test_fma_s32 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; SI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; VI-LABEL: name: test_fma_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; VI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; GFX9-LABEL: name: test_fma_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMA %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMA]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- name: test_fma_s64 @@ -52,34 +68,50 @@ body: | ; SI-LABEL: name: test_fma_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[COPY2]](i64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](i64) ; ; VI-LABEL: name: test_fma_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[COPY2]](i64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](i64) ; ; GFX9-LABEL: name: test_fma_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = G_FMA %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[COPY2]](i64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FMA]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(f64) = G_BITCAST %0(i64) + %4:_(f64) = G_BITCAST %1(i64) + %5:_(f64) = G_BITCAST %2(i64) + %6:_(f64) = G_FMA %3, %4, %5 + %7:_(i64) = G_BITCAST %6(f64) + $vgpr0_vgpr1 = COPY %7(i64) ... --- @@ -91,55 +123,71 @@ body: | ; SI-LABEL: name: test_fma_s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fma_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fma_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(f16) = G_BITCAST %3(i16) + %7:_(f16) = G_BITCAST %4(i16) + %8:_(f16) = G_BITCAST %5(i16) + %9:_(f16) = G_FMA %6, %7, %8 + %10:_(i16) = G_BITCAST %9(f16) + %11:_(i32) = G_ANYEXT %10(i16) + $vgpr0 = COPY %11(i32) - %6:_(s16) = G_FMA %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 ... --- @@ -151,49 +199,65 @@ body: | ; SI-LABEL: name: test_fma_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) ; ; VI-LABEL: name: test_fma_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) ; ; GFX9-LABEL: name: test_fma_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = G_FMA %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %4:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %5:_(<2 x f32>) = G_BITCAST %2(<2 x i32>) + %6:_(<2 x f32>) = G_FMA %3, %4, %5 + %7:_(<2 x i32>) = G_BITCAST %6(<2 x f32>) + $vgpr0_vgpr1 = COPY %7(<2 x i32>) ... --- @@ -205,52 +269,68 @@ body: | ; SI-LABEL: name: test_fma_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32), [[FMA2]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) ; ; VI-LABEL: name: test_fma_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32), [[FMA2]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) ; ; GFX9-LABEL: name: test_fma_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = G_FMA %0, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32), [[FMA2]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %4:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %5:_(<3 x f32>) = G_BITCAST %2(<3 x i32>) + %6:_(<3 x f32>) = G_FMA %3, %4, %5 + %7:_(<3 x i32>) = G_BITCAST %6(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %7(<3 x i32>) ... --- @@ -262,55 +342,71 @@ body: | ; SI-LABEL: name: test_fma_v4s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32), [[FMA2]](f32), [[FMA3]](f32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) ; ; VI-LABEL: name: test_fma_v4s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32), [[FMA2]](f32), [[FMA3]](f32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) ; ; GFX9-LABEL: name: test_fma_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<4 x s32>) = G_FMA %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FMA]](f32), [[FMA1]](f32), [[FMA2]](f32), [[FMA3]](f32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x f32>) = G_BITCAST %0(<4 x i32>) + %4:_(<4 x f32>) = G_BITCAST %1(<4 x i32>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_FMA %3, %4, %5 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -322,49 +418,65 @@ body: | ; SI-LABEL: name: test_fma_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY2]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(f64), [[UV5:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f64>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMA]](f64), [[FMA1]](f64) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<2 x i64>) ; ; VI-LABEL: name: test_fma_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY2]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(f64), [[UV5:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f64>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMA]](f64), [[FMA1]](f64) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<2 x i64>) ; ; GFX9-LABEL: name: test_fma_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<2 x s64>) = G_FMA %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY2]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(f64), [[UV5:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f64>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMA]](f64), [[FMA1]](f64) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %4:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %5:_(<2 x f64>) = G_BITCAST %2(<2 x i64>) + %6:_(<2 x f64>) = G_FMA %3, %4, %5 + %7:_(<2 x i64>) = G_BITCAST %6(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<2 x i64>) ... --- @@ -376,80 +488,118 @@ body: | ; SI-LABEL: name: test_fma_v2s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA1]](f32) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) ; ; VI-LABEL: name: test_fma_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %27(i16) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST3]], [[BITCAST4]], [[BITCAST11]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f16) = G_FMA [[BITCAST5]], [[BITCAST6]], [[BITCAST12]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FMA1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) ; ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = G_FMA %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x f16>) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMA]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %5:_(<2 x f16>) = G_BITCAST %2(<2 x i16>) + %6:_(<2 x f16>) = G_FMA %3, %4, %5 + %7:_(<2 x i16>) = G_BITCAST %6(<2 x f16>) + $vgpr0 = COPY %7(<2 x i16>) ... --- @@ -461,174 +611,234 @@ body: | ; SI-LABEL: name: test_fma_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr6_vgpr7_vgpr8 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<6 x i16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA1]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA2]](f32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST15]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST16]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL2]] + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x i16>), [[BITCAST21]](<2 x i16>), [[BITCAST22]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fma_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr6_vgpr7_vgpr8 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<6 x i16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST2]], [[BITCAST7]], [[BITCAST12]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f16) = G_FMA [[BITCAST3]], [[BITCAST8]], [[BITCAST13]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f16) = G_FMA [[BITCAST4]], [[BITCAST9]], [[BITCAST14]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FMA1]](f16) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FMA2]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST15]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST16]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL2]] + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x i16>), [[BITCAST21]](<2 x i16>), [[BITCAST22]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s16>), %4:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %5:_(<3 x s16>), %6:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %7:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %2 - %9:_(<3 x s16>) = G_FMA %3, %5, %7 - %10:_(<3 x s16>) = G_IMPLICIT_DEF - %11:_(<6 x s16>) = G_CONCAT_VECTORS %9, %10 - $vgpr0_vgpr1_vgpr2 = COPY %11 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST2]](f16), [[BITCAST3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST4]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST7]](f16), [[BITCAST8]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST9]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST12]](f16), [[BITCAST13]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST14]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x f16>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR2]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x f16>) = G_FMA [[BUILD_VECTOR1]], [[BUILD_VECTOR3]], [[BUILD_VECTOR5]] + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(f16) = G_BITCAST %106(i16) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(f16) = G_BITCAST %112(i16) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(f16) = G_BITCAST %107(i16) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMA1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST18]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST19]](i32) + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMA]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST20]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST21]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST21]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST22:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST22]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST22]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-NEXT: [[BITCAST23:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST23]](i32) + ; GFX9-NEXT: [[BITCAST24:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST15]](f16) + ; GFX9-NEXT: [[BITCAST25:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST17]](f16) + ; GFX9-NEXT: [[BITCAST26:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST16]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST24]](i16), [[BITCAST25]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST26]](i16), [[TRUNC12]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC13]](i16), [[TRUNC14]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>), [[BUILD_VECTOR8]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<6 x i16>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i16>), %4:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %5:_(<3 x i16>), %6:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %7:_(<3 x i16>), %8:_(<3 x i16>) = G_UNMERGE_VALUES %2(<6 x i16>) + %9:_(<3 x f16>) = G_BITCAST %3(<3 x i16>) + %10:_(<3 x f16>) = G_BITCAST %5(<3 x i16>) + %11:_(<3 x f16>) = G_BITCAST %7(<3 x i16>) + %12:_(<3 x f16>) = G_FMA %9, %10, %11 + %13:_(<3 x i16>) = G_IMPLICIT_DEF + %14:_(<3 x i16>) = G_BITCAST %12(<3 x f16>) + %15:_(<6 x i16>) = G_CONCAT_VECTORS %14(<3 x i16>), %13(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %15(<6 x i16>) ... @@ -641,137 +851,197 @@ body: | ; SI-LABEL: name: test_fma_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %49(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %55(i16) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %59(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %64(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %60(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %65(i16) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %69(i16) + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %74(i16) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %70(i16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %75(i16) + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; SI-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; SI-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST19]](f16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST21]](f16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA1]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST20]](f16) + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA2]](f32) + ; SI-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; SI-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; SI-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST22]](f16) + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA3]](f32) + ; SI-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; SI-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) ; ; VI-LABEL: name: test_fma_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] - ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] - ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %49(i16) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %59(i16) + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; VI-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; VI-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST3]], [[BITCAST11]], [[BITCAST19]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f16) = G_FMA [[BITCAST5]], [[BITCAST13]], [[BITCAST21]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(f16) = G_FMA [[BITCAST4]], [[BITCAST12]], [[BITCAST20]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(f16) = G_FMA [[BITCAST6]], [[BITCAST14]], [[BITCAST22]] + ; VI-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; VI-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FMA1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FMA2]](f16) + ; VI-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FMA3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; VI-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) ; ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<4 x s16>) = G_FMA %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x f16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x f16>) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FMA]](<2 x f16>), [[FMA1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %6:_(<4 x f16>) = G_FMA %3, %4, %5 + %7:_(<4 x i16>) = G_BITCAST %6(<4 x f16>) + $vgpr0_vgpr1 = COPY %7(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir index 8b7066985c5c6..e344852fb75b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -18,56 +18,74 @@ body: | ; GFX6-LABEL: name: test_fmad_s16_flush ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX7-LABEL: name: test_fmad_s16_flush ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAD]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD]](f16) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX10-LABEL: name: test_fmad_s16_flush ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(s16) = G_FMAD %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(f16) = G_BITCAST %3(i16) + %7:_(f16) = G_BITCAST %4(i16) + %8:_(f16) = G_BITCAST %5(i16) + %9:_(f16) = G_FMAD %6, %7, %8 + %10:_(i16) = G_BITCAST %9(f16) + %11:_(i32) = G_ANYEXT %10(i16) + $vgpr0 = COPY %11(i32) ... --- @@ -84,101 +102,150 @@ body: | ; GFX6-LABEL: name: test_fmad_v2s16_flush ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) + ; ; GFX7-LABEL: name: test_fmad_v2s16_flush ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC2]], [[TRUNC4]] - ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD1]](s16) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %27(i16) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST3]], [[BITCAST4]], [[BITCAST11]] + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST5]], [[BITCAST6]], [[BITCAST12]] + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD]](f16) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD1]](f16) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) + ; ; GFX10-LABEL: name: test_fmad_v2s16_flush ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST6]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[FMUL1]], [[BITCAST12]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST15]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %5:_(<2 x f16>) = G_BITCAST %2(<2 x i16>) + %6:_(<2 x f16>) = G_FMAD %3, %4, %5 + %7:_(<2 x i16>) = G_BITCAST %6(<2 x f16>) + $vgpr0 = COPY %7(<2 x i16>) ... --- @@ -195,182 +262,262 @@ body: | ; GFX6-LABEL: name: test_fmad_v4s16_flush ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT8]], [[FPEXT9]] - ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] - ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT12]], [[FPEXT13]] - ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] - ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %60(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %66(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %61(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %67(i16) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %71(i16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %76(i16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %72(i16) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %77(i16) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %81(i16) + ; GFX6-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %86(i16) + ; GFX6-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %82(i16) + ; GFX6-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %87(i16) + ; GFX6-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX6-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST19]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST21]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL2]](f32) + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC4]](f16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST20]](f16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT10]], [[FPEXT11]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL3]](f32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC6]](f16) + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST22]](f16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT14]], [[FPEXT15]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX6-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC5]](f16) + ; GFX6-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC7]](f16) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) + ; ; GFX7-LABEL: name: test_fmad_v4s16_flush ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC4]], [[TRUNC8]] - ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] - ; GFX7-NEXT: [[FMAD2:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] - ; GFX7-NEXT: [[FMAD3:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD1]](s16) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD2]](s16) - ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD3]](s16) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %49(i16) + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX7-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX7-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; GFX7-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %59(i16) + ; GFX7-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX7-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST3]], [[BITCAST11]], [[BITCAST19]] + ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST5]], [[BITCAST13]], [[BITCAST21]] + ; GFX7-NEXT: [[FMAD2:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST4]], [[BITCAST12]], [[BITCAST20]] + ; GFX7-NEXT: [[FMAD3:%[0-9]+]]:_(f16) = G_FMAD [[BITCAST6]], [[BITCAST14]], [[BITCAST22]] + ; GFX7-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD]](f16) + ; GFX7-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD1]](f16) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX7-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD2]](f16) + ; GFX7-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FMAD3]](f16) + ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX7-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) + ; ; GFX10-LABEL: name: test_fmad_v4s16_flush ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC8]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC9]] - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<4 x s16>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX10-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %62(i16) + ; GFX10-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX10-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %63(i16) + ; GFX10-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX10-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST11]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST19]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST13]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[FMUL1]], [[BITCAST21]] + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[BITCAST12]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[FMUL2]], [[BITCAST20]] + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST6]], [[BITCAST14]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f16) = G_FADD [[FMUL3]], [[BITCAST22]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD2]](f16), [[FADD3]](f16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST27:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST27]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %6:_(<4 x f16>) = G_FMAD %3, %4, %5 + %7:_(<4 x i16>) = G_BITCAST %6(<4 x f16>) + $vgpr0_vgpr1 = COPY %7(<4 x i16>) ... @@ -388,57 +535,75 @@ body: | ; GFX6-LABEL: name: test_fmad_s16_denorm ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX7-LABEL: name: test_fmad_s16_denorm ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX10-LABEL: name: test_fmad_s16_denorm ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(s16) = G_FMAD %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(f16) = G_BITCAST %3(i16) + %7:_(f16) = G_BITCAST %4(i16) + %8:_(f16) = G_BITCAST %5(i16) + %9:_(f16) = G_FMAD %6, %7, %8 + %10:_(i16) = G_BITCAST %9(f16) + %11:_(i32) = G_ANYEXT %10(i16) + $vgpr0 = COPY %11(i32) ... --- @@ -455,57 +620,75 @@ body: | ; GFX6-LABEL: name: test_fmad_s16_denorm_flags ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX7-LABEL: name: test_fmad_s16_denorm_flags ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC2]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX10-LABEL: name: test_fmad_s16_denorm_flags ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC2]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(s16) = nnan G_FMAD %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(f16) = G_BITCAST %3(i16) + %7:_(f16) = G_BITCAST %4(i16) + %8:_(f16) = G_BITCAST %5(i16) + %9:_(f16) = nnan G_FMAD %6, %7, %8 + %10:_(i16) = G_BITCAST %9(f16) + %11:_(i32) = G_ANYEXT %10(i16) + $vgpr0 = COPY %11(i32) ... --- @@ -522,103 +705,152 @@ body: | ; GFX6-LABEL: name: test_fmad_v2s16_denorm ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) + ; ; GFX7-LABEL: name: test_fmad_v2s16_denorm ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST11]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST6]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[FMUL1]], [[BITCAST12]] + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) + ; ; GFX10-LABEL: name: test_fmad_v2s16_denorm ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST6]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[FMUL1]], [[BITCAST12]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST15]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %5:_(<2 x f16>) = G_BITCAST %2(<2 x i16>) + %6:_(<2 x f16>) = G_FMAD %3, %4, %5 + %7:_(<2 x i16>) = G_BITCAST %6(<2 x f16>) + $vgpr0 = COPY %7(<2 x i16>) ... --- @@ -635,103 +867,152 @@ body: | ; GFX6-LABEL: name: test_fmad_v2s16_denorm_flags ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT4]], [[FPEXT5]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) + ; ; GFX7-LABEL: name: test_fmad_v2s16_denorm_flags ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC4]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST3]], [[BITCAST4]] + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL]], [[BITCAST11]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST5]], [[BITCAST6]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL1]], [[BITCAST12]] + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST17]](<2 x f16>) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST18]](<2 x i16>) + ; ; GFX10-LABEL: name: test_fmad_v2s16_denorm_flags ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC4]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = nnan G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST3]], [[BITCAST4]] + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL]], [[BITCAST11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST5]], [[BITCAST6]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL1]], [[BITCAST12]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX10-NEXT: $vgpr0 = COPY [[BITCAST15]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %5:_(<2 x f16>) = G_BITCAST %2(<2 x i16>) + %6:_(<2 x f16>) = nnan G_FMAD %3, %4, %5 + %7:_(<2 x i16>) = G_BITCAST %6(<2 x f16>) + $vgpr0 = COPY %7(<2 x i16>) ... --- @@ -748,186 +1029,266 @@ body: | ; GFX6-LABEL: name: test_fmad_v4s16_denorm ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT8]], [[FPEXT9]] - ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] - ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT12]], [[FPEXT13]] - ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] - ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %60(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %66(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %61(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %67(i16) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %71(i16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %76(i16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %72(i16) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %77(i16) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %81(i16) + ; GFX6-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %86(i16) + ; GFX6-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %82(i16) + ; GFX6-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %87(i16) + ; GFX6-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX6-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST19]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST21]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL2]](f32) + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC4]](f16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST20]](f16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT10]], [[FPEXT11]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL3]](f32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC6]](f16) + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST22]](f16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT14]], [[FPEXT15]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX6-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC5]](f16) + ; GFX6-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC7]](f16) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) + ; ; GFX7-LABEL: name: test_fmad_v4s16_denorm ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC8]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC9]] - ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] - ; GFX7-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX7-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX7-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %62(i16) + ; GFX7-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX7-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %63(i16) + ; GFX7-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX7-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST11]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST19]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST13]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[FMUL1]], [[BITCAST21]] + ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[BITCAST12]] + ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[FMUL2]], [[BITCAST20]] + ; GFX7-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST6]], [[BITCAST14]] + ; GFX7-NEXT: [[FADD3:%[0-9]+]]:_(f16) = G_FADD [[FMUL3]], [[BITCAST22]] + ; GFX7-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX7-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX7-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; GFX7-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FADD3]](f16) + ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX7-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) + ; ; GFX10-LABEL: name: test_fmad_v4s16_denorm ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC8]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC9]] - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<4 x s16>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX10-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %62(i16) + ; GFX10-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX10-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %63(i16) + ; GFX10-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX10-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST11]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[FMUL]], [[BITCAST19]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST13]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[FMUL1]], [[BITCAST21]] + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[BITCAST12]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[FMUL2]], [[BITCAST20]] + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST6]], [[BITCAST14]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f16) = G_FADD [[FMUL3]], [[BITCAST22]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD2]](f16), [[FADD3]](f16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST27:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST27]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %6:_(<4 x f16>) = G_FMAD %3, %4, %5 + %7:_(<4 x i16>) = G_BITCAST %6(<4 x f16>) + $vgpr0_vgpr1 = COPY %7(<4 x i16>) ... @@ -945,185 +1306,264 @@ body: | ; GFX6-LABEL: name: test_fmad_v4s16_denorm_flags ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT4]], [[FPEXT5]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT8]], [[FPEXT9]] - ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT10]], [[FPEXT11]] - ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT12]], [[FPEXT13]] - ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT14]], [[FPEXT15]] - ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %60(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %66(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %61(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %67(i16) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %71(i16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %76(i16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %72(i16) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %77(i16) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %81(i16) + ; GFX6-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %86(i16) + ; GFX6-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %82(i16) + ; GFX6-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %87(i16) + ; GFX6-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX6-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST19]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST21]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL2]](f32) + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC4]](f16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST20]](f16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT10]], [[FPEXT11]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = nnan G_FMUL [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL3]](f32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC6]](f16) + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST22]](f16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = nnan G_FADD [[FPEXT14]], [[FPEXT15]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX6-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC5]](f16) + ; GFX6-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC7]](f16) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) + ; ; GFX7-LABEL: name: test_fmad_v4s16_denorm_flags ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC8]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC9]] - ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC10]] - ; GFX7-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX7-NEXT: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX7-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX7-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %62(i16) + ; GFX7-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX7-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %63(i16) + ; GFX7-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX7-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST3]], [[BITCAST11]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL]], [[BITCAST19]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST5]], [[BITCAST13]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL1]], [[BITCAST21]] + ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST4]], [[BITCAST12]] + ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL2]], [[BITCAST20]] + ; GFX7-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST6]], [[BITCAST14]] + ; GFX7-NEXT: [[FADD3:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL3]], [[BITCAST22]] + ; GFX7-NEXT: [[BITCAST27:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX7-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST27]](i16) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST28]](i16) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX7-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7-NEXT: [[BITCAST29:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX7-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; GFX7-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FADD3]](f16) + ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX7-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST29]](<2 x f16>), [[BITCAST32]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST33:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST33]](<4 x i16>) + ; ; GFX10-LABEL: name: test_fmad_v4s16_denorm_flags ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC8]] - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC9]] - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC10]] - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<4 x s16>) = nnan G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY2]](<4 x i16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX10-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX10-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX10-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX10-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; GFX10-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; GFX10-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX10-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST17]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST18]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<2 x f16>), [[UV5:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f16>) + ; GFX10-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX10-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST %62(i16) + ; GFX10-NEXT: [[BITCAST21:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX10-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %63(i16) + ; GFX10-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV5]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST24:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST23]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST24]](i32) + ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST24]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX10-NEXT: [[BITCAST25:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV4]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST26:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST25]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST26]](i32) + ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST26]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST3]], [[BITCAST11]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL]], [[BITCAST19]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST5]], [[BITCAST13]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL1]], [[BITCAST21]] + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST4]], [[BITCAST12]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL2]], [[BITCAST20]] + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = nnan G_FMUL [[BITCAST6]], [[BITCAST14]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f16) = nnan G_FADD [[FMUL3]], [[BITCAST22]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD2]](f16), [[FADD3]](f16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX10-NEXT: [[BITCAST27:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST27]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %6:_(<4 x f16>) = nnan G_FMAD %3, %4, %5 + %7:_(<4 x i16>) = G_BITCAST %6(<4 x f16>) + $vgpr0_vgpr1 = COPY %7(<4 x i16>) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir index fb0f31b98d710..987e92bc566c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir @@ -20,41 +20,64 @@ body: | ; GFX6-LABEL: name: test_fmad_s32_flush ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMAD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX7-LABEL: name: test_fmad_s32_flush ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX7-NEXT: $vgpr0 = COPY [[FMAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX101-LABEL: name: test_fmad_s32_flush ; GFX101: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX101-NEXT: $vgpr0 = COPY [[FMAD]](s32) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; GFX101-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX103-LABEL: name: test_fmad_s32_flush ; GFX103: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX103-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX103-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMAD %3, %4, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -71,41 +94,64 @@ body: | ; GFX6-LABEL: name: test_fmad_s32_flags_flush ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMAD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(f32) = nnan G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX7-LABEL: name: test_fmad_s32_flags_flush ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX7-NEXT: $vgpr0 = COPY [[FMAD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f32) = nnan G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX101-LABEL: name: test_fmad_s32_flags_flush ; GFX101: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX101-NEXT: $vgpr0 = COPY [[FMAD]](s32) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(f32) = nnan G_FMAD [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAD]](f32) + ; GFX101-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX103-LABEL: name: test_fmad_s32_flags_flush ; GFX103: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] - ; GFX103-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = nnan G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX103-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = nnan G_FMAD %3, %4, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -122,62 +168,85 @@ body: | ; GFX6-LABEL: name: test_fmad_v2s32_flush ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] - ; GFX6-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX6-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GFX7-LABEL: name: test_fmad_v2s32_flush ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] - ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GFX101-LABEL: name: test_fmad_v2s32_flush ; GFX101: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX101-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX101-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] - ; GFX101-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] - ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) - ; GFX101-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX101-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX101-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX101-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32) + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX101-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GFX103-LABEL: name: test_fmad_v2s32_flush ; GFX103: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX103-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX103-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] - ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] - ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX103-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX103-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX103-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV4]] + ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX103-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %4:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %5:_(<2 x f32>) = G_BITCAST %2(<2 x i32>) + %6:_(<2 x f32>) = G_FMAD %3, %4, %5 + %7:_(<2 x i32>) = G_BITCAST %6(<2 x f32>) + $vgpr0_vgpr1 = COPY %7(<2 x i32>) ... --- @@ -194,67 +263,90 @@ body: | ; GFX6-LABEL: name: test_fmad_v3s32_flush ; GFX6: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] - ; GFX6-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] - ; GFX6-NEXT: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX6-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX6-NEXT: [[FMAD2:%[0-9]+]]:_(f32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32), [[FMAD2]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + ; ; GFX7-LABEL: name: test_fmad_v3s32_flush ; GFX7: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] - ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] - ; GFX7-NEXT: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX7-NEXT: [[FMAD2:%[0-9]+]]:_(f32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32), [[FMAD2]](f32) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + ; ; GFX101-LABEL: name: test_fmad_v3s32_flush ; GFX101: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX101-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX101-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX101-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] - ; GFX101-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] - ; GFX101-NEXT: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] - ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) - ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX101-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX101-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX101-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX101-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX101-NEXT: [[FMAD2:%[0-9]+]]:_(f32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32), [[FMAD2]](f32) + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + ; ; GFX103-LABEL: name: test_fmad_v3s32_flush ; GFX103: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX103-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX103-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX103-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] - ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] - ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX103-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX103-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX103-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV6]] + ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %4:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %5:_(<3 x f32>) = G_BITCAST %2(<3 x i32>) + %6:_(<3 x f32>) = G_FMAD %3, %4, %5 + %7:_(<3 x i32>) = G_BITCAST %6(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %7(<3 x i32>) ... --- @@ -271,72 +363,95 @@ body: | ; GFX6-LABEL: name: test_fmad_v4s32_flush ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] - ; GFX6-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] - ; GFX6-NEXT: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] - ; GFX6-NEXT: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX6-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX6-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX6-NEXT: [[FMAD2:%[0-9]+]]:_(f32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX6-NEXT: [[FMAD3:%[0-9]+]]:_(f32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32), [[FMAD2]](f32), [[FMAD3]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GFX7-LABEL: name: test_fmad_v4s32_flush ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] - ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] - ; GFX7-NEXT: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] - ; GFX7-NEXT: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX7-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX7-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX7-NEXT: [[FMAD2:%[0-9]+]]:_(f32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX7-NEXT: [[FMAD3:%[0-9]+]]:_(f32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32), [[FMAD2]](f32), [[FMAD3]](f32) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GFX101-LABEL: name: test_fmad_v4s32_flush ; GFX101: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX101-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX101-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] - ; GFX101-NEXT: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] - ; GFX101-NEXT: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] - ; GFX101-NEXT: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] - ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) - ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX101-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX101-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX101-NEXT: [[FMAD:%[0-9]+]]:_(f32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX101-NEXT: [[FMAD1:%[0-9]+]]:_(f32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX101-NEXT: [[FMAD2:%[0-9]+]]:_(f32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX101-NEXT: [[FMAD3:%[0-9]+]]:_(f32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FMAD]](f32), [[FMAD1]](f32), [[FMAD2]](f32), [[FMAD3]](f32) + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GFX103-LABEL: name: test_fmad_v4s32_flush ; GFX103: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX103-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX103-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] - ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] - ; GFX103-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX103-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] - ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) - ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<4 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX103-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX103-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV4]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV8]] + ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV5]] + ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV6]] + ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX103-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[UV3]], [[UV7]] + ; GFX103-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32), [[FADD3]](f32) + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x f32>) = G_BITCAST %0(<4 x i32>) + %4:_(<4 x f32>) = G_BITCAST %1(<4 x i32>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_FMAD %3, %4, %5 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -353,44 +468,67 @@ body: | ; GFX6-LABEL: name: test_fmad_s32_denorm ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX7-LABEL: name: test_fmad_s32_denorm ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX7-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX101-LABEL: name: test_fmad_s32_denorm ; GFX101: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX101-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX101-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX103-LABEL: name: test_fmad_s32_denorm ; GFX103: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX103-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[BITCAST2]] + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX103-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMAD %3, %4, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -407,44 +545,67 @@ body: | ; GFX6-LABEL: name: test_fmad_s32_flags_denorm ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] - ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX7-LABEL: name: test_fmad_s32_flags_denorm ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] - ; GFX7-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX101-LABEL: name: test_fmad_s32_flags_denorm ; GFX101: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] - ; GFX101-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX101-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + ; ; GFX103-LABEL: name: test_fmad_s32_flags_denorm ; GFX103: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] - ; GFX103-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = nnan G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[FMUL]], [[BITCAST2]] + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX103-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = nnan G_FMAD %3, %4, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -461,68 +622,91 @@ body: | ; GFX6-LABEL: name: test_fmad_v2s32_denorm ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV4]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GFX7-LABEL: name: test_fmad_v2s32_denorm ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV4]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GFX101-LABEL: name: test_fmad_v2s32_denorm ; GFX101: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX101-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX101-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] - ; GFX101-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX101-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] - ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX101-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX101-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX101-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV4]] + ; GFX101-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; GFX101-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX101-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GFX103-LABEL: name: test_fmad_v2s32_denorm ; GFX103: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX103-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX103-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] - ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] - ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX103-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GFX103-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX103-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f32>) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV4]] + ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX103-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %4:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %5:_(<2 x f32>) = G_BITCAST %2(<2 x i32>) + %6:_(<2 x f32>) = G_FMAD %3, %4, %5 + %7:_(<2 x i32>) = G_BITCAST %6(<2 x f32>) + $vgpr0_vgpr1 = COPY %7(<2 x i32>) ... --- @@ -539,76 +723,99 @@ body: | ; GFX6-LABEL: name: test_fmad_v3s32_denorm ; GFX6: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV6]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + ; ; GFX7-LABEL: name: test_fmad_v3s32_denorm ; GFX7: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX7-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX7-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV6]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + ; ; GFX101-LABEL: name: test_fmad_v3s32_denorm ; GFX101: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX101-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX101-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX101-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] - ; GFX101-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX101-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX101-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX101-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] - ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX101-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX101-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX101-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV6]] + ; GFX101-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; GFX101-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX101-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; GFX101-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + ; ; GFX103-LABEL: name: test_fmad_v3s32_denorm ; GFX103: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX103-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX103-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX103-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] - ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] - ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY2]](<3 x i32>) + ; GFX103-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX103-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX103-NEXT: [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32), [[UV8:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<3 x f32>) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV6]] + ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32) + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST3]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %4:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %5:_(<3 x f32>) = G_BITCAST %2(<3 x i32>) + %6:_(<3 x f32>) = G_FMAD %3, %4, %5 + %7:_(<3 x i32>) = G_BITCAST %6(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %7(<3 x i32>) ... --- @@ -625,82 +832,105 @@ body: | ; GFX6-LABEL: name: test_fmad_v4s32_denorm ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV4]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV8]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV5]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV6]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[UV3]], [[UV7]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32), [[FADD3]](f32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GFX7-LABEL: name: test_fmad_v4s32_denorm ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX7-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] - ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] - ; GFX7-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX7-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX7-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX7-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV4]] + ; GFX7-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV8]] + ; GFX7-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV5]] + ; GFX7-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX7-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV6]] + ; GFX7-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX7-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[UV3]], [[UV7]] + ; GFX7-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32), [[FADD3]](f32) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GFX101-LABEL: name: test_fmad_v4s32_denorm ; GFX101: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX101-NEXT: {{ $}} - ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX101-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX101-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; GFX101-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX101-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] - ; GFX101-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX101-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] - ; GFX101-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX101-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] - ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) - ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX101-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX101-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX101-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX101-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX101-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX101-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX101-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX101-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX101-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX101-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV4]] + ; GFX101-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV8]] + ; GFX101-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV5]] + ; GFX101-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX101-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV6]] + ; GFX101-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX101-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[UV3]], [[UV7]] + ; GFX101-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX101-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32), [[FADD3]](f32) + ; GFX101-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX101-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GFX103-LABEL: name: test_fmad_v4s32_denorm ; GFX103: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX103-NEXT: {{ $}} - ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX103-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX103-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] - ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] - ; GFX103-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX103-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] - ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) - ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<4 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX103-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX103-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX103-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX103-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; GFX103-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY1]](<4 x i32>) + ; GFX103-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GFX103-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; GFX103-NEXT: [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32), [[UV6:%[0-9]+]]:_(f32), [[UV7:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f32>) + ; GFX103-NEXT: [[UV8:%[0-9]+]]:_(f32), [[UV9:%[0-9]+]]:_(f32), [[UV10:%[0-9]+]]:_(f32), [[UV11:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST2]](<4 x f32>) + ; GFX103-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV4]] + ; GFX103-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UV8]] + ; GFX103-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV5]] + ; GFX103-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX103-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV6]] + ; GFX103-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX103-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[UV3]], [[UV7]] + ; GFX103-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX103-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32), [[FADD2]](f32), [[FADD3]](f32) + ; GFX103-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; GFX103-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x f32>) = G_BITCAST %0(<4 x i32>) + %4:_(<4 x f32>) = G_BITCAST %1(<4 x i32>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_FMAD %3, %4, %5 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir index 66af355b0d6ab..6ef2f6de79d71 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir @@ -18,17 +18,25 @@ body: | ; CHECK-LABEL: name: test_fmad_s64_flush ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[COPY2]](i64) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(f64) = G_BITCAST %0(i64) + %4:_(f64) = G_BITCAST %1(i64) + %5:_(f64) = G_BITCAST %2(i64) + %6:_(f64) = G_FMAD %3, %4, %5 + %7:_(i64) = G_BITCAST %6(f64) + $vgpr0_vgpr1 = COPY %7(i64) ... --- @@ -45,23 +53,31 @@ body: | ; CHECK-LABEL: name: test_fmad_v2s64_flush ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<2 x s64>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY2]](<2 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(f64), [[UV5:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f64>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[UV2]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[UV4]] + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[UV3]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[FMUL1]], [[UV5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %4:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %5:_(<2 x f64>) = G_BITCAST %2(<2 x i64>) + %6:_(<2 x f64>) = G_FMAD %3, %4, %5 + %7:_(<2 x i64>) = G_BITCAST %6(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<2 x i64>) ... --- @@ -78,17 +94,25 @@ body: | ; CHECK-LABEL: name: test_fmad_s64_denorm ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[COPY2]](i64) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[BITCAST2]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(f64) = G_BITCAST %0(i64) + %4:_(f64) = G_BITCAST %1(i64) + %5:_(f64) = G_BITCAST %2(i64) + %6:_(f64) = G_FMAD %3, %4, %5 + %7:_(i64) = G_BITCAST %6(f64) + $vgpr0_vgpr1 = COPY %7(i64) ... --- @@ -105,21 +129,29 @@ body: | ; CHECK-LABEL: name: test_fmad_v2s64_denorm ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<2 x s64>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY2]](<2 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(f64), [[UV5:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST2]](<2 x f64>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[UV2]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[UV4]] + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[UV3]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[FMUL1]], [[UV5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %4:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %5:_(<2 x f64>) = G_BITCAST %2(<2 x i64>) + %6:_(<2 x f64>) = G_FMAD %3, %4, %5 + %7:_(<2 x i64>) = G_BITCAST %6(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 78bed9e19c65e..ee5d7139fb70f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -17,36 +17,48 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -61,30 +73,42 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FMAXNUM:%[0-9]+]]:_(f32) = G_FMAXNUM [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(f32) = G_FMAXNUM [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMAXNUM:%[0-9]+]]:_(f32) = G_FMAXNUM [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -96,30 +120,42 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = nnan G_FMAXNUM_IEEE [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_s32_nnan ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = nnan G_FMAXNUM_IEEE [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nnan G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = nnan G_FMAXNUM_IEEE [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nnan G_FMAXNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... @@ -132,33 +168,48 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = nnan COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = nnan COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... @@ -171,33 +222,48 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = nnan COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -209,30 +275,48 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = nnan COPY $vgpr0 - %1:_(s32) = nnan COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = nnan COPY $vgpr0 + %1:_(i32) = nnan COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -244,36 +328,48 @@ body: | ; SI-LABEL: name: test_fmaxnum_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMAXNUM_IEEE]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; VI-LABEL: name: test_fmaxnum_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMAXNUM_IEEE]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-LABEL: name: test_fmaxnum_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FMAXNUM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMAXNUM_IEEE]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FMAXNUM %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -285,49 +381,61 @@ body: | ; SI-LABEL: name: test_fmaxnum_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fmaxnum_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_FMAXNUM %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FMAXNUM %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -339,54 +447,66 @@ body: | ; SI-LABEL: name: test_fmaxnum_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV3]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](f32), [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fmaxnum_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV3]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](f32), [[FMAXNUM_IEEE1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fmaxnum_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FMAXNUM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](f32), [[FMAXNUM_IEEE1]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FMAXNUM %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -398,72 +518,100 @@ body: | ; SI-LABEL: name: test_fmaxnum_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; VI-LABEL: name: test_fmaxnum_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST5]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; GFX9-LABEL: name: test_fmaxnum_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FMAXNUM %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -474,158 +622,203 @@ body: | ; SI-LABEL: name: test_fmaxnum_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE2]](f32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST7]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST8]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST9]] + ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE1]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE2]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_FMAXNUM %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST2]](f16), [[BITCAST3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST4]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST7]](f16), [[BITCAST8]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST9]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %86(i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %92(i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %87(i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST10]](f16) + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST12]](f16) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST11]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST19]](i16), [[BITCAST20]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST21]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x f16>) = G_BITCAST %2(<3 x i16>) + %7:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %8:_(<3 x f16>) = G_FMAXNUM %6, %7 + %9:_(<3 x i16>) = G_IMPLICIT_DEF + %10:_(<3 x i16>) = G_BITCAST %8(<3 x f16>) + %11:_(<6 x i16>) = G_CONCAT_VECTORS %10(<3 x i16>), %9(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... @@ -638,124 +831,168 @@ body: | ; SI-LABEL: name: test_fmaxnum_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE2]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE3]](f32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; VI-LABEL: name: test_fmaxnum_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %49(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST10]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST12]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST11]] + ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST5]] + ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST13]] + ; VI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE2]](f16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; GFX9-LABEL: name: test_fmaxnum_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FMAXNUM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x f16>), [[FMAXNUM_IEEE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %3:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %4:_(<4 x f16>) = G_FMAXNUM %2, %3 + %5:_(<4 x i16>) = G_BITCAST %4(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- @@ -770,50 +1007,66 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; VI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = G_FMAXNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = COPY $vgpr2 + %6:_(f32) = G_BITCAST %5(i32) + %7:_(f32) = G_FMAXNUM %4, %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -828,44 +1081,56 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMAXNUM %0, %1 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = G_FMAXNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMAXNUM %2, %1 + %4:_(i32) = COPY $vgpr1 + %5:_(f32) = G_BITCAST %4(i32) + %6:_(f32) = G_FMAXNUM %3, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -880,50 +1145,66 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; VI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = G_FMAXNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = COPY $vgpr2 + %6:_(f32) = G_BITCAST %5(i32) + %7:_(f32) = G_FMAXNUM %4, %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -938,44 +1219,56 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMINNUM %0, %1 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = G_FMAXNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMINNUM %2, %1 + %4:_(i32) = COPY $vgpr1 + %5:_(f32) = G_BITCAST %4(i32) + %6:_(f32) = G_FMAXNUM %3, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -990,33 +1283,41 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; VI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMAXNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMAXNUM %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -1031,63 +1332,81 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) ; ; VI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) ; ; GFX9-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16) = G_FCONSTANT half 0xH0000 - %2:_(<2 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16) - %3:_(<2 x s16>) = G_FMAXNUM %0, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[C]](f16), [[C]](f16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(f16) = G_FCONSTANT half 0xH0000 + %2:_(<2 x f16>) = G_BUILD_VECTOR %1(f16), %1(f16) + %3:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x f16>) = G_FMAXNUM %3, %2 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index a20c2fa21eb1e..be2b95b53caaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -17,36 +17,48 @@ body: | ; SI-LABEL: name: test_fminnum_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -61,30 +73,42 @@ body: | ; SI-LABEL: name: test_fminnum_s32_ieee_mode_off ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = G_FMINNUM [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = G_FMINNUM [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMINNUM:%[0-9]+]]:_(f32) = G_FMINNUM [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -96,30 +120,42 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = nnan G_FMINNUM_IEEE [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_s32_nnan ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = nnan G_FMINNUM_IEEE [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_s32_nnan ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nnan G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = nnan G_FMINNUM_IEEE [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nnan G_FMINNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... @@ -132,33 +168,48 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_lhs ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_s32_nnan_lhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = nnan COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = nnan COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... @@ -171,33 +222,48 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_rhs ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_s32_nnan_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = nnan COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -209,30 +275,48 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = nnan COPY $vgpr0 - %1:_(s32) = nnan COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = nnan COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = nnan COPY $vgpr0 + %1:_(i32) = nnan COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -244,36 +328,48 @@ body: | ; SI-LABEL: name: test_fminnum_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMINNUM_IEEE]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; VI-LABEL: name: test_fminnum_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMINNUM_IEEE]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-LABEL: name: test_fminnum_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FMINNUM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f64) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMINNUM_IEEE]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FMINNUM %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -285,49 +381,61 @@ body: | ; SI-LABEL: name: test_fminnum_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fminnum_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fminnum_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_FMINNUM %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FMINNUM %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -339,54 +447,66 @@ body: | ; SI-LABEL: name: test_fminnum_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV3]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](f32), [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fminnum_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV3]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](f32), [[FMINNUM_IEEE1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fminnum_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FMINNUM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](f32), [[FMINNUM_IEEE1]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FMINNUM %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -398,72 +518,100 @@ body: | ; SI-LABEL: name: test_fminnum_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; VI-LABEL: name: test_fminnum_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST5]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FMINNUM %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -474,158 +622,203 @@ body: | ; SI-LABEL: name: test_fminnum_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE2]](f32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fminnum_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST7]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST8]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST9]] + ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE1]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE2]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_FMINNUM %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST2]](f16), [[BITCAST3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST4]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST7]](f16), [[BITCAST8]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST9]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %86(i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %92(i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %87(i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST10]](f16) + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST12]](f16) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST11]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST19]](i16), [[BITCAST20]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST21]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x f16>) = G_BITCAST %2(<3 x i16>) + %7:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %8:_(<3 x f16>) = G_FMINNUM %6, %7 + %9:_(<3 x i16>) = G_IMPLICIT_DEF + %10:_(<3 x i16>) = G_BITCAST %8(<3 x f16>) + %11:_(<6 x i16>) = G_CONCAT_VECTORS %10(<3 x i16>), %9(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... @@ -638,124 +831,168 @@ body: | ; SI-LABEL: name: test_fminnum_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE2]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE3]](f32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; VI-LABEL: name: test_fminnum_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %49(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST10]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST4]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST12]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST3]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST11]] + ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST5]] + ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST13]] + ; VI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE2]](f16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FMINNUM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x f16>), [[FMINNUM_IEEE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %3:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %4:_(<4 x f16>) = G_FMINNUM %2, %3 + %5:_(<4 x i16>) = G_BITCAST %4(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- @@ -770,50 +1007,66 @@ body: | ; SI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; VI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; GFX9-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = G_FMINNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMINNUM %2, %3 + %5:_(i32) = COPY $vgpr2 + %6:_(f32) = G_BITCAST %5(i32) + %7:_(f32) = G_FMINNUM %4, %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -828,44 +1081,56 @@ body: | ; SI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMINNUM %0, %1 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = G_FMINNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMINNUM %2, %1 + %4:_(i32) = COPY $vgpr1 + %5:_(f32) = G_BITCAST %4(i32) + %6:_(f32) = G_FMINNUM %3, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -880,50 +1145,66 @@ body: | ; SI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; VI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) ; ; GFX9-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMAXNUM %0, %1 - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = G_FMINNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[COPY2]](i32) + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMAXNUM %2, %3 + %5:_(i32) = COPY $vgpr2 + %6:_(f32) = G_BITCAST %5(i32) + %7:_(f32) = G_FMINNUM %4, %6 + %8:_(i32) = G_BITCAST %7(f32) + $vgpr0 = COPY %8(i32) ... --- @@ -938,44 +1219,56 @@ body: | ; SI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMAXNUM %0, %1 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = G_FMINNUM %2, %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMAXNUM %2, %1 + %4:_(i32) = COPY $vgpr1 + %5:_(f32) = G_BITCAST %4(i32) + %6:_(f32) = G_FMINNUM %3, %5 + %7:_(i32) = G_BITCAST %6(f32) + $vgpr0 = COPY %7(i32) ... --- @@ -990,33 +1283,41 @@ body: | ; SI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; VI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCONSTANT float 0.000000e+00 - %2:_(s32) = G_FMINNUM %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_FCONSTANT float 0.000000e+00 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_FMINNUM %2, %1 + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -1031,63 +1332,81 @@ body: | ; SI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMINNUM_IEEE1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) ; ; VI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST2]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FMINNUM_IEEE1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) ; ; GFX9-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16) = G_FCONSTANT half 0xH0000 - %2:_(<2 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16) - %3:_(<2 x s16>) = G_FMINNUM %0, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[C]](f16), [[C]](f16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BITCAST]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(f16) = G_FCONSTANT half 0xH0000 + %2:_(<2 x f16>) = G_BUILD_VECTOR %1(f16), %1(f16) + %3:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x f16>) = G_FMINNUM %3, %2 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index acfc6f69d8f79..374366285288c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -14,30 +14,42 @@ body: | ; SI-LABEL: name: test_fmul_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fmul_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9PLUS-LABEL: name: test_fmul_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[FMUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMUL %0, %1 - $vgpr0 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL]](f32) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- name: test_fmul_s64 @@ -48,30 +60,42 @@ body: | ; SI-LABEL: name: test_fmul_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMUL]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; VI-LABEL: name: test_fmul_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMUL]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9PLUS-LABEL: name: test_fmul_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FMUL]](f64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FMUL %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -83,46 +107,58 @@ body: | ; SI-LABEL: name: test_fmul_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fmul_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9PLUS-LABEL: name: test_fmul_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FMUL %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) - %4:_(s16) = G_FMUL %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 ... --- @@ -134,42 +170,54 @@ body: | ; SI-LABEL: name: test_fmul_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fmul_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_fmul_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV2]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV3]] + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FMUL %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -181,42 +229,54 @@ body: | ; SI-LABEL: name: test_fmul_v2s32_flags ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[UV2]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV]], [[UV2]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fmul_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[UV2]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV]], [[UV2]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_fmul_v2s32_flags ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[UV2]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = nnan G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV]], [[UV2]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[UV1]], [[UV3]] + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = nnan G_FMUL %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -228,45 +288,57 @@ body: | ; SI-LABEL: name: test_fmul_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; VI-LABEL: name: test_fmul_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; GFX9PLUS-LABEL: name: test_fmul_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX9PLUS-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_FMUL %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[UV3]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[UV4]] + ; GFX9PLUS-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[UV5]] + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL]](f32), [[FMUL1]](f32), [[FMUL2]](f32) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %3:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %4:_(<3 x f32>) = G_FMUL %2, %3 + %5:_(<3 x i32>) = G_BITCAST %4(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) ... --- @@ -278,42 +350,54 @@ body: | ; SI-LABEL: name: test_fmul_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[UV2]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMUL]](f64), [[FMUL1]](f64) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; VI-LABEL: name: test_fmul_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[UV2]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMUL]](f64), [[FMUL1]](f64) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_fmul_v2s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_FMUL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[UV2]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[UV3]] + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FMUL]](f64), [[FMUL1]](f64) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %3:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %4:_(<2 x f64>) = G_FMUL %2, %3 + %5:_(<2 x i64>) = G_BITCAST %4(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... --- @@ -325,66 +409,94 @@ body: | ; SI-LABEL: name: test_fmul_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; VI-LABEL: name: test_fmul_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[BITCAST3]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[BITCAST5]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_fmul_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[FMUL]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FMUL %0, %1 - $vgpr0 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[BITCAST]], [[BITCAST1]] + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMUL]](<2 x f16>) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FMUL %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -396,148 +508,193 @@ body: | ; SI-LABEL: name: test_fmul_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL2]](f32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fmul_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[BITCAST7]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST8]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[BITCAST9]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL1]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL2]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9PLUS-LABEL: name: test_fmul_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV]], [[UV3]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_FMUL %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9PLUS-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9PLUS-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST2]](f16), [[BITCAST3]](f16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST4]](f16), [[DEF]](f16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST7]](f16), [[BITCAST8]](f16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST9]](f16), [[DEF]](f16) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; GFX9PLUS-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %82(i16) + ; GFX9PLUS-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %88(i16) + ; GFX9PLUS-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %83(i16) + ; GFX9PLUS-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMUL1]](<2 x f16>) + ; GFX9PLUS-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST13]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX9PLUS-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMUL]](<2 x f16>) + ; GFX9PLUS-NEXT: [[BITCAST16:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST15]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST16]](i32) + ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST16]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9PLUS-NEXT: [[BITCAST18:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST18]](i32) + ; GFX9PLUS-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST10]](f16) + ; GFX9PLUS-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST12]](f16) + ; GFX9PLUS-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST11]](f16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST19]](i16), [[BITCAST20]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST21]](i16), [[TRUNC9]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x f16>) = G_BITCAST %2(<3 x i16>) + %7:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %8:_(<3 x f16>) = G_FMUL %6, %7 + %9:_(<3 x i16>) = G_IMPLICIT_DEF + %10:_(<3 x i16>) = G_BITCAST %8(<3 x f16>) + %11:_(<6 x i16>) = G_CONCAT_VECTORS %10(<3 x i16>), %9(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... @@ -550,110 +707,154 @@ body: | ; SI-LABEL: name: test_fmul_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[FPEXT7]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %53(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL2]](f32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL3]](f32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; VI-LABEL: name: test_fmul_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[BITCAST10]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[BITCAST12]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[BITCAST11]] + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST5]], [[BITCAST13]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL]](f16) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL2]](f16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FMUL3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_fmul_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV1]], [[UV3]] - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMUL]](<2 x s16>), [[FMUL1]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FMUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[UV]], [[UV2]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x f16>) = G_FMUL [[UV1]], [[UV3]] + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FMUL]](<2 x f16>), [[FMUL1]](<2 x f16>) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %3:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %4:_(<4 x f16>) = G_FMUL %2, %3 + %5:_(<4 x i16>) = G_BITCAST %4(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index 49cd1f7986129..a5e93f904ec8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -14,26 +14,34 @@ body: | ; SI-LABEL: name: test_fneg_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; VI-LABEL: name: test_fneg_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) ; ; GFX9-LABEL: name: test_fneg_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FNEG %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FNEG]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FNEG %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- name: test_fneg_s64 @@ -44,26 +52,34 @@ body: | ; SI-LABEL: name: test_fneg_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FNEG]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; VI-LABEL: name: test_fneg_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FNEG]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) ; ; GFX9-LABEL: name: test_fneg_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FNEG %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FNEG]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FNEG %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_fneg_s16 @@ -74,34 +90,42 @@ body: | ; SI-LABEL: name: test_fneg_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FNEG]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fneg_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FNEG]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fneg_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FNEG %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FNEG]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FNEG %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -113,35 +137,43 @@ body: | ; SI-LABEL: name: test_fneg_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[UV]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FNEG]](f32), [[FNEG1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) ; ; VI-LABEL: name: test_fneg_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[UV]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FNEG]](f32), [[FNEG1]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) ; ; GFX9-LABEL: name: test_fneg_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FNEG %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[UV]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FNEG]](f32), [[FNEG1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FNEG %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -153,38 +185,46 @@ body: | ; SI-LABEL: name: test_fneg_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] - ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[UV]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[UV1]] + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FNEG]](f32), [[FNEG1]](f32), [[FNEG2]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) ; ; VI-LABEL: name: test_fneg_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] - ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[UV]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[UV1]] + ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FNEG]](f32), [[FNEG1]](f32), [[FNEG2]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) ; ; GFX9-LABEL: name: test_fneg_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] - ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FNEG %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[UV]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[UV1]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FNEG]](f32), [[FNEG1]](f32), [[FNEG2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FNEG %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -196,35 +236,43 @@ body: | ; SI-LABEL: name: test_fneg_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FNEG]](f64), [[FNEG1]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) ; ; VI-LABEL: name: test_fneg_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FNEG]](f64), [[FNEG1]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) ; ; GFX9-LABEL: name: test_fneg_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FNEG %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FNEG]](f64), [[FNEG1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FNEG %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -236,26 +284,34 @@ body: | ; SI-LABEL: name: test_fneg_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; VI-LABEL: name: test_fneg_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: test_fneg_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FNEG %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FNEG %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -264,69 +320,117 @@ body: | bb.0: ; SI-LABEL: name: test_fneg_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST5]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG1]](<2 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST11]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST12]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST12]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ZEXT2]](i32), [[ZEXT3]](i32), [[ZEXT4]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_fneg_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %54(i16) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[DEF1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST3]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST4]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST5]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG1]](<2 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST9]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST11]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST12]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST12]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST6]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST8]](f16) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST7]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST15]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ZEXT2]](i32), [[ZEXT3]](i32), [[ZEXT4]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_fneg_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FNEG %0 - %2:_(<3 x s32>) = G_ZEXT %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %50(i16) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST]](f16), [[DEF1]](f16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %45(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST5]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ZEXT]](i32), [[ZEXT1]](i32), [[ZEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FNEG %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i32>) = G_ZEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -338,33 +442,41 @@ body: | ; SI-LABEL: name: test_fneg_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV1]] + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FNEG]](<2 x f16>), [[FNEG1]](<2 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) ; ; VI-LABEL: name: test_fneg_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV1]] + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FNEG]](<2 x f16>), [[FNEG1]](<2 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) ; ; GFX9-LABEL: name: test_fneg_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FNEG %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[UV1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[FNEG]](<2 x f16>), [[FNEG1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FNEG %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir index bd9eef0800054..09be8f3a5ccd5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -10,14 +10,18 @@ body: | ; CHECK-LABEL: name: test_fpext_f16_to_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[FPEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s32) = G_FPEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FPEXT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f32) = G_FPEXT %2(f16) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... --- @@ -29,19 +33,26 @@ body: | ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = nnan G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = nnan G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = nnan G_FPEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = nnan G_FPEXT [[BITCAST1]](f16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = nnan G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FPEXT]](f32), [[FPEXT1]](f32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f32>) = nnan G_FPEXT %1(<2 x f16>) + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -53,19 +64,26 @@ body: | ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32_w_flags ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = nnan G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = nnan G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = nnan G_FPEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = nnan G_FPEXT [[BITCAST1]](f16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = nnan G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FPEXT]](f32), [[FPEXT1]](f32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f32>) = nnan G_FPEXT %1(<2 x f16>) + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -76,24 +94,30 @@ body: | ; CHECK-LABEL: name: test_fpext_v3f16_to_v3f32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s32>) = G_FPEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FPEXT]](f32), [[FPEXT1]](f32), [[FPEXT2]](f32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST5]](<3 x i32>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x f16>) = G_BITCAST %1(<3 x i16>) + %4:_(<3 x f32>) = G_FPEXT %3(<3 x f16>) + %5:_(<3 x i32>) = G_BITCAST %4(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) ... --- @@ -105,26 +129,34 @@ body: | ; CHECK-LABEL: name: test_fpext_v4f16_to_v4f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32), [[FPEXT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = G_IMPLICIT_DEF - %1:_(<4 x s32>) = G_FPEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %14(i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %20(i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f32>) = G_BUILD_VECTOR [[FPEXT]](f32), [[FPEXT1]](f32), [[FPEXT2]](f32), [[FPEXT3]](f32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[BUILD_VECTOR]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST8]](<4 x i32>) + %0:_(<4 x f16>) = G_IMPLICIT_DEF + %1:_(<4 x f32>) = G_FPEXT %0(<4 x f16>) + %2:_(<4 x i32>) = G_BITCAST %1(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -136,12 +168,16 @@ body: | ; CHECK-LABEL: name: test_fpext_f32_to_f64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FPEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_FPEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f64) = G_FPEXT [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FPEXT]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f64) = G_FPEXT %1(f32) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -153,15 +189,19 @@ body: | ; CHECK-LABEL: name: test_fpext_v2f32_to_v2f64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_FPEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f64) = G_FPEXT [[UV]](f32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f64) = G_FPEXT [[UV1]](f32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FPEXT]](f64), [[FPEXT1]](f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f64>) = G_FPEXT %1(<2 x f32>) + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -173,16 +213,20 @@ body: | ; CHECK-LABEL: name: test_fpext_v3f32_to_v3f64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s64) = G_FPEXT [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64), [[FPEXT2]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s64>) = G_FPEXT %0 - S_NOP 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f64) = G_FPEXT [[UV]](f32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f64) = G_FPEXT [[UV1]](f32) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(f64) = G_FPEXT [[UV2]](f32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f64>) = G_BUILD_VECTOR [[FPEXT]](f64), [[FPEXT1]](f64), [[FPEXT2]](f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i64>) = G_BITCAST [[BUILD_VECTOR]](<3 x f64>) + ; CHECK-NEXT: S_NOP 0, implicit [[BITCAST1]](<3 x i64>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f64>) = G_FPEXT %1(<3 x f32>) + %3:_(<3 x i64>) = G_BITCAST %2(<3 x f64>) + S_NOP 0, implicit %3(<3 x i64>) ... @@ -195,17 +239,21 @@ body: | ; CHECK-LABEL: name: test_fpext_v4f32_to_v4f64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s64) = G_FPEXT [[UV2]](s32) - ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s64) = G_FPEXT [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64), [[FPEXT2]](s64), [[FPEXT3]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s64>) = G_FPEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f32>) = G_BITCAST [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<4 x f32>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f64) = G_FPEXT [[UV]](f32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f64) = G_FPEXT [[UV1]](f32) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(f64) = G_FPEXT [[UV2]](f32) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(f64) = G_FPEXT [[UV3]](f32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x f64>) = G_BUILD_VECTOR [[FPEXT]](f64), [[FPEXT1]](f64), [[FPEXT2]](f64), [[FPEXT3]](f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x i64>) = G_BITCAST [[BUILD_VECTOR]](<4 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST1]](<4 x i64>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x f32>) = G_BITCAST %0(<4 x i32>) + %2:_(<4 x f64>) = G_FPEXT %1(<4 x f32>) + %3:_(<4 x i64>) = G_BITCAST %2(<4 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -217,15 +265,19 @@ body: | ; CHECK-LABEL: name: test_fpext_f16_to_f64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[FPEXT]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FPEXT1]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_FPEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f64) = G_FPEXT [[FPEXT]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[FPEXT1]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f64) = G_FPEXT %2(f16) + %4:_(i64) = G_BITCAST %3(f64) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -237,19 +289,26 @@ body: | ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = nnan G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[FPEXT]](s32) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = nnan G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s64) = G_FPEXT [[FPEXT2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FPEXT1]](s64), [[FPEXT3]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s64>) = nnan G_FPEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %10(i16) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = nnan G_FPEXT [[BITCAST1]](f16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %11(i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(f64) = G_FPEXT [[FPEXT]](f32) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = nnan G_FPEXT [[BITCAST2]](f16) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(f64) = G_FPEXT [[FPEXT2]](f32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FPEXT1]](f64), [[FPEXT3]](f64) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST5]](<2 x i64>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f64>) = nnan G_FPEXT %1(<2 x f16>) + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir index fea0df308b3ed..2c948c6964eb9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir @@ -13,61 +13,71 @@ body: | ; GFX6-LABEL: name: test_fpow_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[BITCAST1]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT2]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT3]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + ; ; GFX9-LABEL: name: test_fpow_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FPOW %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[BITCAST1]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT2]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT3]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FPOW %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -79,93 +89,103 @@ body: | ; GFX6-LABEL: name: test_fpow_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV2]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]] - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]] - ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]] - ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV3]](s32) - ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]] - ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[UV2]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT2]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT3]] + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT4]] + ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[INT3]], [[SELECT5]] + ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](f32), [[UV3]](f32) + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT4]](f32), [[C5]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INT4]], [[SELECT6]] + ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT5]], [[SELECT7]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL1]](f32), [[FMUL3]](f32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fpow_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]] - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FPOW %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[UV2]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT2]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT3]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT4]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[INT3]], [[SELECT5]] + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](f32), [[UV3]](f32) + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT4]](f32), [[C5]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INT4]], [[SELECT6]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT5]], [[SELECT7]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FMUL1]](f32), [[FMUL3]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FPOW %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -177,119 +197,129 @@ body: | ; GFX6-LABEL: name: test_fpow_v3s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV3]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]] - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]] - ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]] - ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV4]](s32) - ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]] - ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]] - ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; GFX6-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT8]] - ; GFX6-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32) - ; GFX6-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT6]], [[SELECT9]] - ; GFX6-NEXT: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](s32), [[UV5]](s32) - ; GFX6-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT7]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[INT7]], [[SELECT10]] - ; GFX6-NEXT: [[INT8:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX6-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT8]], [[SELECT11]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[UV3]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT2]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT3]] + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT4]] + ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[INT3]], [[SELECT5]] + ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](f32), [[UV4]](f32) + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT4]](f32), [[C5]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INT4]], [[SELECT6]] + ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT5]], [[SELECT7]] + ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; GFX6-NEXT: [[SELECT8:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[SELECT8]] + ; GFX6-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](f32) + ; GFX6-NEXT: [[SELECT9:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[INT6]], [[SELECT9]] + ; GFX6-NEXT: [[INT7:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](f32), [[UV5]](f32) + ; GFX6-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT7]](f32), [[C5]] + ; GFX6-NEXT: [[SELECT10:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[INT7]], [[SELECT10]] + ; GFX6-NEXT: [[INT8:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX6-NEXT: [[SELECT11:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[INT8]], [[SELECT11]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL1]](f32), [[FMUL3]](f32), [[FMUL5]](f32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fpow_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]] - ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV4]](s32) - ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]] - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]] - ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT8]] - ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32) - ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT6]], [[SELECT9]] - ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT7]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[INT7]], [[SELECT10]] - ; GFX9-NEXT: [[INT8:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) - ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT8]], [[SELECT11]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_FPOW %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[UV3]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT2]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT3]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f32), [[C]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[SELECT4]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](f32) + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[INT3]], [[SELECT5]] + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](f32), [[UV4]](f32) + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT4]](f32), [[C5]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INT4]], [[SELECT6]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INT5]], [[SELECT7]] + ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV2]](f32), [[C]] + ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[SELECT8]] + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](f32) + ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[INT6]], [[SELECT9]] + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](f32), [[UV5]](f32) + ; GFX9-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT7]](f32), [[C5]] + ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[INT7]], [[SELECT10]] + ; GFX9-NEXT: [[INT8:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](f32) + ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[INT8]], [[SELECT11]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FMUL1]](f32), [[FMUL3]](f32), [[FMUL5]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %3:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %4:_(<3 x f32>) = G_FPOW %2, %3 + %5:_(<3 x i32>) = G_BITCAST %4(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) ... --- @@ -301,61 +331,71 @@ body: | ; GFX6-LABEL: name: test_fpow_s32_flags ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[COPY]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan nsz G_FSUB [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT1]], [[SELECT2]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT2]], [[SELECT3]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan nsz G_FMUL [[BITCAST]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan nsz G_FSUB [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[BITCAST1]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan nsz G_FADD [[INT1]], [[SELECT2]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan nsz G_FMUL [[INT2]], [[SELECT3]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + ; ; GFX9-LABEL: name: test_fpow_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[COPY]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan nsz G_FSUB [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT1]], [[SELECT2]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT2]], [[SELECT3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nnan nsz G_FPOW %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan nsz G_FMUL [[BITCAST]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan nsz G_FSUB [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[BITCAST1]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan nsz G_FADD [[INT1]], [[SELECT2]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan nsz G_FMUL [[INT2]], [[SELECT3]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nnan nsz G_FPOW %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -367,50 +407,60 @@ body: | ; GFX6-LABEL: name: test_fpow_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[FPEXT1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](f32), [[FPEXT1]](f32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fpow_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC]] - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_FPOW %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(f16) = G_FLOG2 [[BITCAST]] + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[FLOG2_]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](f32), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = G_FEXP2 [[FPTRUNC]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FEXP2_]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FPOW %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -422,82 +472,106 @@ body: | ; GFX6-LABEL: name: test_fpow_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[FPEXT1]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](s32) - ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT3]](s32), [[FPEXT3]](s32) - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT2]] - ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](f32), [[FPEXT1]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT1]](f32), [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INT1]], [[SELECT]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C4]], [[C5]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INT2]], [[SELECT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](f32) + ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT3]](f32), [[FPEXT3]](f32) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[INT4]](f32), [[C1]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INT4]], [[SELECT2]] + ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C4]], [[C5]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INT5]], [[SELECT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fpow_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC]] - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC]] - ; GFX9-NEXT: [[FLOG2_1:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC1]] - ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_1]](s16) - ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](s32), [[FPEXT3]](s32) - ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FEXP2_]](s16), [[FEXP2_1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FPOW %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(f16) = G_FLOG2 [[BITCAST2]] + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[FLOG2_]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](f32), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = G_FEXP2 [[FPTRUNC]] + ; GFX9-NEXT: [[FLOG2_1:%[0-9]+]]:_(f16) = G_FLOG2 [[BITCAST4]] + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FLOG2_1]](f16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](f32), [[FPEXT3]](f32) + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(f16) = G_FEXP2 [[FPTRUNC1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FEXP2_]](f16), [[FEXP2_1]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FPOW %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -509,80 +583,104 @@ body: | ; GFX6-LABEL: name: test_fpow_v2s16_flags ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[FPEXT1]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](s32), [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT1]], [[SELECT]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT2]], [[SELECT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](s32) - ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT3]](s32), [[FPEXT3]](s32) - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT4]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT4]], [[SELECT2]] - ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C4]], [[C5]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT5]], [[SELECT3]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %38(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %44(i16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](f32), [[FPEXT1]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](f32), [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan nsz G_FADD [[INT1]], [[SELECT]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP]](i1), [[C4]], [[C5]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan nsz G_FMUL [[INT2]], [[SELECT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](f32) + ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT3]](f32), [[FPEXT3]](f32) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan nsz G_FCMP floatpred(olt), [[INT4]](f32), [[C1]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = nnan nsz G_FADD [[INT4]], [[SELECT2]] + ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](f32) + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = nnan nsz G_SELECT [[FCMP1]](i1), [[C4]], [[C5]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan nsz G_FMUL [[INT5]], [[SELECT3]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL1]](f32) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fpow_v2s16_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(s16) = nnan nsz G_FLOG2 [[TRUNC]] - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[FLOG2_]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[TRUNC2]](s16) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = nnan nsz G_FEXP2 [[FPTRUNC]] - ; GFX9-NEXT: [[FLOG2_1:%[0-9]+]]:_(s16) = nnan nsz G_FLOG2 [[TRUNC1]] - ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[FLOG2_1]](s16) - ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[TRUNC3]](s16) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](s32), [[FPEXT3]](s32) - ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = nnan nsz G_FEXP2 [[FPTRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FEXP2_]](s16), [[FEXP2_1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = nnan nsz G_FPOW %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(f16) = nnan nsz G_FLOG2 [[BITCAST2]] + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = nnan nsz G_FPEXT [[FLOG2_]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = nnan nsz G_FPEXT [[BITCAST3]](f16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](f32), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = nnan nsz G_FEXP2 [[FPTRUNC]] + ; GFX9-NEXT: [[FLOG2_1:%[0-9]+]]:_(f16) = nnan nsz G_FLOG2 [[BITCAST4]] + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = nnan nsz G_FPEXT [[FLOG2_1]](f16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = nnan nsz G_FPEXT [[BITCAST5]](f16) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](f32), [[FPEXT3]](f32) + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(f16) = nnan nsz G_FEXP2 [[FPTRUNC1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FEXP2_]](f16), [[FEXP2_1]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = nnan nsz G_FPOW %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir index 3e7f4e8843f4f..5842bf3b43c2a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir @@ -13,48 +13,55 @@ body: | ; GFX6-LABEL: name: test_fpowi_s16_s32_flags ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32) - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[SITOFP]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[INT1]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[INT1]], [[SELECT]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[SELECT1]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[COPY1]](i32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](f32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](f32), [[SITOFP]](f32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(olt), [[INT1]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[INT1]], [[SELECT]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[SELECT1]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMUL]](f32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fpowi_s16_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY1]](s32) - ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(s16) = nnan G_FLOG2 [[TRUNC]] - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = nnan G_FPEXT [[FLOG2_]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = nnan G_FPEXT [[SITOFP]](s16) - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) - ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = nnan G_FEXP2 [[FPTRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = nnan G_FPOWI %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[SITOFP:%[0-9]+]]:_(f16) = G_SITOFP [[COPY1]](i32) + ; GFX9-NEXT: [[FLOG2_:%[0-9]+]]:_(f16) = nnan G_FLOG2 [[BITCAST]] + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = nnan G_FPEXT [[FLOG2_]](f16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = nnan G_FPEXT [[SITOFP]](f16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](f32), [[FPEXT1]](f32) + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(f16) = nnan G_FEXP2 [[FPTRUNC]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FEXP2_]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(f16) = G_BITCAST %2(i16) + %4:_(f16) = nnan G_FPOWI %3, %1(i32) + %5:_(i16) = G_BITCAST %4(f16) + %6:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) ... --- @@ -66,61 +73,68 @@ body: | ; GFX6-LABEL: name: test_fpowi_s32_s32_flags ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[SITOFP]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[INT1]], [[SELECT2]] - ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[SELECT3]] - ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[SELECT]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[INT]], [[SELECT1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[SITOFP]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[INT1]], [[SELECT2]] + ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[SELECT3]] + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fpowi_s32_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3810000000000000 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[SITOFP]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02 - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[INT1]](s32), [[C5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01 - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C6]], [[C4]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[INT1]], [[SELECT2]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000 - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C7]], [[C2]] - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[SELECT3]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nnan G_FPOWI %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3810000000000000 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[SELECT]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 3.200000e+01 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[INT]], [[SELECT1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](f32), [[SITOFP]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.260000e+02 + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = nnan G_FCMP floatpred(olt), [[INT1]](f32), [[C5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(f32) = G_FCONSTANT float 6.400000e+01 + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C6]], [[C4]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = nnan G_FADD [[INT1]], [[SELECT2]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](f32) + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3BF0000000000000 + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = nnan G_SELECT [[FCMP1]](i1), [[C7]], [[C2]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nnan G_FMUL [[INT2]], [[SELECT3]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL1]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = nnan G_FPOWI %2, %1(i32) + %4:_(i32) = G_BITCAST %3(f32) + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir index 3094d19471611..cc2d864699c80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir @@ -11,18 +11,22 @@ body: | ; SI-LABEL: name: test_fptosi_s32_to_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s32_to_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FPTOSI %0 - $vgpr0 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOSI %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -34,18 +38,22 @@ body: | ; SI-LABEL: name: test_fptosi_s64_to_s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f64) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s64_to_s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_FPTOSI %0 - $vgpr0 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f64) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i32) = G_FPTOSI %1(f64) + $vgpr0 = COPY %2(i32) ... --- @@ -57,24 +65,28 @@ body: | ; SI-LABEL: name: test_fptosi_v2s32_to_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s32) - ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[UV]](f32) + ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[UV1]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOSI]](i32), [[FPTOSI1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; VI-LABEL: name: test_fptosi_v2s32_to_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s32) - ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FPTOSI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[UV]](f32) + ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[UV1]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOSI]](i32), [[FPTOSI1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x i32>) = G_FPTOSI %1(<2 x f32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -86,24 +98,28 @@ body: | ; SI-LABEL: name: test_fptosi_v2s64_to_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s64) - ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[UV]](f64) + ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[UV1]](f64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOSI]](i32), [[FPTOSI1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; VI-LABEL: name: test_fptosi_v2s64_to_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s64) - ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_FPTOSI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[UV]](f64) + ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[UV1]](f64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOSI]](i32), [[FPTOSI1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x i32>) = G_FPTOSI %1(<2 x f64>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -115,24 +131,28 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s16_to_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s16) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTOSI]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FPTOSI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i16) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FPTOSI]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i16) = G_FPTOSI %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -144,19 +164,23 @@ body: | ; SI-LABEL: name: test_fptosi_s32_to_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s32_to_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_FPTOSI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i16) = G_FPTOSI %1(f32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -168,19 +192,23 @@ body: | ; SI-LABEL: name: test_fptosi_s64_to_s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f64) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s64_to_s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_FPTOSI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f64) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i16) = G_FPTOSI %1(f64) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -192,59 +220,66 @@ body: | ; SI-LABEL: name: test_fptosi_s64_s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[SELECT1]], [[C8]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT2]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD]](s64) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[SELECT1]], [[C8]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT1]], [[C10]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[FMUL]](f64), [[FMUL]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[FMUL]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT2]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[FNEG]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FADD]](f64) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI]](i32), [[FPTOSI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](i64) + ; ; VI-LABEL: name: test_fptosi_s64_s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR]](s64) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FPTOSI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FFLOOR]](f64) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI]](i32), [[FPTOSI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i64) = G_FPTOSI %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -256,57 +291,64 @@ body: | ; SI-LABEL: name: test_fptosi_s64_s64_flags ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = nnan G_FMUL [[SELECT1]], [[C8]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT1]], [[C10]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[FMUL]], [[FNEG]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FADD]], [[C9]], [[SELECT1]] - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD]](s64) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = nnan G_FMUL [[SELECT1]], [[C8]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = nnan G_FMINNUM_IEEE [[INT1]], [[C10]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = nnan G_FNEG [[FMINNUM_IEEE]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = nnan G_FADD [[FMUL]], [[FNEG]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = nnan G_FMA [[FADD]], [[C9]], [[SELECT1]] + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FADD]](f64) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI]](i32), [[FPTOSI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](i64) + ; ; VI-LABEL: name: test_fptosi_s64_s64_flags ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = nnan G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = nnan G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR]](s64) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = nnan G_FPTOSI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = nnan G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = nnan G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = nnan G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = nnan G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FFLOOR]](f64) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI]](i32), [[FPTOSI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i64) = nnan G_FPTOSI %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -318,93 +360,103 @@ body: | ; SI-LABEL: name: test_fptosi_v2s64_to_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[SELECT1]], [[C8]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT2]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD]](s64) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND2]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV2]], [[AND3]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[SELECT4]], [[C8]] - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT3]], [[C10]] - ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL1]](s64), [[FMUL1]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[FMUL1]], [[FMINNUM_IEEE1]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT5]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[FNEG1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FADD1]], [[C9]], [[SELECT4]] - ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD1]](s64) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s64) - ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOSI1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[SELECT1]], [[C8]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT1]], [[C10]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[FMUL]](f64), [[FMUL]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[FMUL]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT2]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[FNEG]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FADD]](f64) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI]](i32), [[FPTOSI]](i32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[INT2:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[INT2]], [[C2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C3]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND2]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB1]](i32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[ASHR1]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[XOR1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[AND3]](i64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB1]](i32), [[C5]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB1]](i32), [[C7]] + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(f64) = G_SELECT [[ICMP2]](i1), [[BITCAST5]], [[BITCAST6]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(f64) = G_SELECT [[ICMP3]](i1), [[UV1]], [[SELECT3]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[SELECT4]], [[C8]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f64) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT3]], [[C10]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[FMUL1]](f64), [[FMUL1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(f64) = G_SELECT [[FCMP1]](i1), [[FMUL1]], [[FMINNUM_IEEE1]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[SELECT5]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[FMUL1]], [[FNEG1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FADD1]], [[C9]], [[SELECT4]] + ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[FADD1]](f64) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f64) + ; SI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOSI1]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV1]](i64), [[MV3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; VI-LABEL: name: test_fptosi_v2s64_to_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR]](s64) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32) - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL1]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] - ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR1]](s64) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s64) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOSI1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FPTOSI %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FFLOOR]](f64) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI]](i32), [[FPTOSI]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f64) = G_FFLOOR [[FMUL1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] + ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[FFLOOR1]](f64) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f64) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOSI1]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x i64>) = G_FPTOSI %1(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -416,52 +468,58 @@ body: | ; SI-LABEL: name: test_fptosi_s32_to_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FABS]], [[C1]] - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[MV]] - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; SI-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[ASHR]] - ; SI-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; SI-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FABS]], [[C1]] + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[MV]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; SI-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[ASHR]] + ; SI-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](i64) + ; ; VI-LABEL: name: test_fptosi_s32_to_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FABS]], [[C1]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[MV]] - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; VI-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[ASHR]] - ; VI-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_FPTOSI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; VI-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FABS]], [[C1]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[MV]] + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; VI-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[ASHR]] + ; VI-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i64) = G_FPTOSI %1(f32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -473,86 +531,94 @@ body: | ; SI-LABEL: name: test_fptosi_v2s32_to_v2s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FABS]], [[C1]] - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[MV]] - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; SI-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[ASHR]] - ; SI-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[ASHR]], [[USUBO1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC1]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FABS1]], [[C1]] - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR1]], [[C2]], [[FABS1]] - ; SI-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR1]](s32) - ; SI-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s32) - ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR1]](s32), [[ASHR1]](s32) - ; SI-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32) - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV4]], [[MV3]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; SI-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[ASHR1]] - ; SI-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[ASHR1]], [[USUBO3]] - ; SI-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV5]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; SI-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FABS]], [[C1]] + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[MV]] + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; SI-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[ASHR]] + ; SI-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[ASHR]], [[USUBO1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC1]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FABS1]], [[C1]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR1]], [[C2]], [[FABS1]] + ; SI-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR1]](f32) + ; SI-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f32) + ; SI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR1]](i32), [[ASHR1]](i32) + ; SI-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI3]](i32), [[FPTOUI2]](i32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV4]], [[MV3]] + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; SI-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[ASHR1]] + ; SI-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[ASHR1]], [[USUBO3]] + ; SI-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV2]](i64), [[MV5]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; VI-LABEL: name: test_fptosi_v2s32_to_v2s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FABS]], [[C1]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[MV]] - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; VI-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[ASHR]] - ; VI-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[ASHR]], [[USUBO1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC1]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FABS1]], [[C1]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL1]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR1]], [[C2]], [[FABS1]] - ; VI-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR1]](s32) - ; VI-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s32) - ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR1]](s32), [[ASHR1]](s32) - ; VI-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32) - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV4]], [[MV3]] - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; VI-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[ASHR1]] - ; VI-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[ASHR1]], [[USUBO3]] - ; VI-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV5]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_FPTOSI %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; VI-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FABS]], [[C1]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[MV]] + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; VI-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[ASHR]] + ; VI-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[ASHR]], [[USUBO1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC1]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FABS1]], [[C1]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR1]], [[C2]], [[FABS1]] + ; VI-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR1]](f32) + ; VI-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f32) + ; VI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR1]](i32), [[ASHR1]](i32) + ; VI-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI3]](i32), [[FPTOUI2]](i32) + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV4]], [[MV3]] + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; VI-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[ASHR1]] + ; VI-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[ASHR1]], [[USUBO3]] + ; VI-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV2]](i64), [[MV5]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x i64>) = G_FPTOSI %1(<2 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -564,23 +630,27 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; SI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[FPTOSI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; VI-LABEL: name: test_fptosi_s16_to_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_FPTOSI %1 - $vgpr0_vgpr1 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[FPTOSI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i64) = G_FPTOSI %2(f16) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -592,36 +662,46 @@ body: | ; SI-LABEL: name: test_fptosi_v2s16_to_v2s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32) - ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC1]](s16) - ; SI-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %10(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[FPTOSI]](i32) + ; SI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST2]](f16) + ; SI-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[FPTOSI1]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SEXT]](i64), [[SEXT1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; VI-LABEL: name: test_fptosi_v2s16_to_v2s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32) - ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC1]](s16) - ; VI-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s64>) = G_FPTOSI %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST1]](f16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %10(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[FPTOSI]](i32) + ; VI-NEXT: [[FPTOSI1:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST2]](f16) + ; VI-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[FPTOSI1]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SEXT]](i64), [[SEXT1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x i64>) = G_FPTOSI %1(<2 x f16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -632,24 +712,28 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s1 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) - ; SI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](s1) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[FPTOSI]](i32) + ; SI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](i1) + ; ; VI-LABEL: name: test_fptosi_s16_to_s1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) - ; VI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s1) = G_FPTOSI %1 - S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[FPTOSI]](i32) + ; VI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i1) = G_FPTOSI %2(f16) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -661,23 +745,27 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s15 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s16_to_s15 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s15) = G_FPTOSI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i15) = G_FPTOSI %2(f16) + %4:_(i32) = G_ANYEXT %3(i15) + $vgpr0 = COPY %4(i32) ... --- @@ -689,23 +777,27 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s17 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s16_to_s17 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s17) = G_FPTOSI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i17) = G_FPTOSI %2(f16) + %4:_(i32) = G_ANYEXT %3(i17) + $vgpr0 = COPY %4(i32) ... --- @@ -717,53 +809,59 @@ body: | ; SI-LABEL: name: test_fptosi_s32_to_s33 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FABS]], [[C1]] - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[MV]] - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; SI-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[ASHR]] - ; SI-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; SI-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FABS]], [[C1]] + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[MV]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; SI-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[ASHR]] + ; SI-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](i64) + ; ; VI-LABEL: name: test_fptosi_s32_to_s33 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FABS]], [[C1]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[MV]] - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; VI-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[ASHR]] - ; VI-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s33) = G_FPTOSI %0 - %2:_(s64) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; VI-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FABS]], [[C1]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C2]], [[FABS]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[MV]] + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; VI-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[ASHR]] + ; VI-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[ASHR]], [[USUBO1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV2]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i33) = G_FPTOSI %1(f32) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -775,23 +873,27 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s7 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s16_to_s7 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s7) = G_FPTOSI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i7) = G_FPTOSI %2(f16) + %4:_(i32) = G_ANYEXT %3(i7) + $vgpr0 = COPY %4(i32) ... --- @@ -803,23 +905,27 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s8 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s16_to_s8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s8) = G_FPTOSI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i8) = G_FPTOSI %2(f16) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -831,23 +937,27 @@ body: | ; SI-LABEL: name: test_fptosi_s16_to_s9 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s16_to_s9 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s9) = G_FPTOSI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i9) = G_FPTOSI %2(f16) + %4:_(i32) = G_ANYEXT %3(i9) + $vgpr0 = COPY %4(i32) ... --- @@ -859,19 +969,23 @@ body: | ; SI-LABEL: name: test_fptosi_s32_to_s15 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s32_to_s15 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s15) = G_FPTOSI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i15) = G_FPTOSI %1(f32) + %3:_(i32) = G_ANYEXT %2(i15) + $vgpr0 = COPY %3(i32) ... --- @@ -883,17 +997,21 @@ body: | ; SI-LABEL: name: test_fptosi_s32_to_s17 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + ; ; VI-LABEL: name: test_fptosi_s32_to_s17 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s17) = G_FPTOSI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOSI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i17) = G_FPTOSI %1(f32) + %3:_(i32) = G_ANYEXT %2(i17) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir index 2eab791f2ba12..bfeb5d20046d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir @@ -11,18 +11,22 @@ body: | ; SI-LABEL: name: test_fptoui_s32_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s32_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FPTOUI %0 - $vgpr0 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOUI %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -34,18 +38,22 @@ body: | ; SI-LABEL: name: test_fptoui_s32_s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f64) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s32_s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_FPTOUI %0 - $vgpr0 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f64) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i32) = G_FPTOUI %1(f64) + $vgpr0 = COPY %2(i32) ... --- @@ -57,24 +65,28 @@ body: | ; SI-LABEL: name: test_fptoui_v2s32_to_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[UV]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[UV1]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOUI]](i32), [[FPTOUI1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; VI-LABEL: name: test_fptoui_v2s32_to_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FPTOUI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[UV]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[UV1]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOUI]](i32), [[FPTOUI1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x i32>) = G_FPTOUI %1(<2 x f32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -86,24 +98,28 @@ body: | ; SI-LABEL: name: test_fptoui_v2s64_to_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s64) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[UV]](f64) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[UV1]](f64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOUI]](i32), [[FPTOUI1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; VI-LABEL: name: test_fptoui_v2s64_to_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s64) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_FPTOUI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[UV]](f64) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[UV1]](f64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FPTOUI]](i32), [[FPTOUI1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x i32>) = G_FPTOUI %1(<2 x f64>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -115,24 +131,28 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s16_to_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s16) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTOUI]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FPTOUI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i16) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FPTOUI]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i16) = G_FPTOUI %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -144,19 +164,23 @@ body: | ; SI-LABEL: name: test_fptoui_s32_to_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s32_to_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_FPTOUI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i16) = G_FPTOUI %1(f32) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -168,19 +192,23 @@ body: | ; SI-LABEL: name: test_fptoui_s64_to_s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f64) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s64_to_s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_FPTOUI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f64) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i16) = G_FPTOUI %1(f64) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -192,59 +220,66 @@ body: | ; SI-LABEL: name: test_fptoui_s64_s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[SELECT1]], [[C8]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT2]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s64) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[SELECT1]], [[C8]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT1]], [[C10]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[FMUL]](f64), [[FMUL]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[FMUL]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT2]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[FNEG]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD]](f64) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](i64) + ; ; VI-LABEL: name: test_fptoui_s64_s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s64) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FPTOUI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f64) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i64) = G_FPTOUI %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -256,57 +291,64 @@ body: | ; SI-LABEL: name: test_fptoui_s64_s64_flags ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = nnan G_FMUL [[SELECT1]], [[C8]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT1]], [[C10]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[FMUL]], [[FNEG]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FADD]], [[C9]], [[SELECT1]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s64) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = nnan G_FMUL [[SELECT1]], [[C8]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = nnan G_FMINNUM_IEEE [[INT1]], [[C10]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = nnan G_FNEG [[FMINNUM_IEEE]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = nnan G_FADD [[FMUL]], [[FNEG]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = nnan G_FMA [[FADD]], [[C9]], [[SELECT1]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD]](f64) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](i64) + ; ; VI-LABEL: name: test_fptoui_s64_s64_flags ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = nnan G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = nnan G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s64) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = nnan G_FPTOUI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = nnan G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = nnan G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = nnan G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = nnan G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f64) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i64) = nnan G_FPTOUI %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -318,93 +360,103 @@ body: | ; SI-LABEL: name: test_fptoui_v2s64_to_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[SELECT1]], [[C8]] - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT2]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s64) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND2]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV2]], [[AND3]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[SELECT4]], [[C8]] - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64) - ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT3]], [[C10]] - ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL1]](s64), [[FMUL1]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[FMUL1]], [[FMINNUM_IEEE1]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT5]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[FNEG1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FADD1]], [[C9]], [[SELECT4]] - ; SI-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s64) - ; SI-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s64) - ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; SI-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[SELECT1]], [[C8]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT1]], [[C10]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[FMUL]](f64), [[FMUL]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[FMUL]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT2]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FMUL]], [[FNEG]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FADD]], [[C9]], [[SELECT1]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD]](f64) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[INT2:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[INT2]], [[C2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C3]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND2]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f64) = G_BITCAST [[MV2]](i64) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB1]](i32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[ASHR1]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[XOR1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[AND3]](i64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB1]](i32), [[C5]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB1]](i32), [[C7]] + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(f64) = G_SELECT [[ICMP2]](i1), [[BITCAST5]], [[BITCAST6]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(f64) = G_SELECT [[ICMP3]](i1), [[UV1]], [[SELECT3]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[SELECT4]], [[C8]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f64) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(f64) = G_FMINNUM_IEEE [[INT3]], [[C10]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ord), [[FMUL1]](f64), [[FMUL1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(f64) = G_SELECT [[FCMP1]](i1), [[FMUL1]], [[FMINNUM_IEEE1]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[SELECT5]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[FMUL1]], [[FNEG1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FADD1]], [[C9]], [[SELECT4]] + ; SI-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f64) + ; SI-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f64) + ; SI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI3]](i32), [[FPTOUI2]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV1]](i64), [[MV3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; VI-LABEL: name: test_fptoui_v2s64_to_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s64) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL1]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] - ; VI-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR1]](s64) - ; VI-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s64) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FPTOUI %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f64) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f64) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f64) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f64) = G_FFLOOR [[FMUL1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] + ; VI-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR1]](f64) + ; VI-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f64) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI3]](i32), [[FPTOUI2]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x i64>) = G_FPTOUI %1(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -416,34 +468,38 @@ body: | ; SI-LABEL: name: test_fptoui_s32_to_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; VI-LABEL: name: test_fptoui_s32_to_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_FPTOUI %0 - $vgpr0_vgpr1 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i64) = G_FPTOUI %1(f32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -455,52 +511,56 @@ body: | ; SI-LABEL: name: test_fptoui_v2s32_to_v2s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] - ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL1]] - ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] - ; SI-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR1]](s32) - ; SI-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] + ; SI-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR1]](f32) + ; SI-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI3]](i32), [[FPTOUI2]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; VI-LABEL: name: test_fptoui_v2s32_to_v2s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL1]] - ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] - ; VI-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR1]](s32) - ; VI-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_FPTOUI %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR1]], [[C1]], [[INTRINSIC_TRUNC1]] + ; VI-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR1]](f32) + ; VI-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA1]](f32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI3]](i32), [[FPTOUI2]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x i64>) = G_FPTOUI %1(<2 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -512,23 +572,27 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[FPTOUI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; VI-LABEL: name: test_fptoui_s16_to_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_FPTOUI %1 - $vgpr0_vgpr1 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[FPTOUI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i64) = G_FPTOUI %2(f16) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -540,36 +604,46 @@ body: | ; SI-LABEL: name: test_fptoui_v2s16_to_v2s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC1]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %10(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[FPTOUI]](i32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST2]](f16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[FPTOUI1]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ZEXT]](i64), [[ZEXT1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; VI-LABEL: name: test_fptoui_v2s16_to_v2s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s64>) = G_FPTOUI %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST1]](f16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %10(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[FPTOUI]](i32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST2]](f16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[FPTOUI1]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ZEXT]](i64), [[ZEXT1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x i64>) = G_FPTOUI %1(<2 x f16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -580,24 +654,28 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s1 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) - ; SI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](s1) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[FPEXT]](f32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[FPTOSI]](i32) + ; SI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](i1) + ; ; VI-LABEL: name: test_fptoui_s16_to_s1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) - ; VI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s1) = G_FPTOSI %1 - S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOSI:%[0-9]+]]:_(i32) = G_FPTOSI [[BITCAST]](f16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[FPTOSI]](i32) + ; VI-NEXT: S_ENDPGM 0, implicit [[TRUNC1]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i1) = G_FPTOSI %2(f16) + S_ENDPGM 0, implicit %3(i1) ... --- @@ -609,23 +687,27 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s15 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s16_to_s15 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s15) = G_FPTOUI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i15) = G_FPTOUI %2(f16) + %4:_(i32) = G_ANYEXT %3(i15) + $vgpr0 = COPY %4(i32) ... --- @@ -637,23 +719,27 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s17 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s16_to_s17 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s17) = G_FPTOUI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i17) = G_FPTOUI %2(f16) + %4:_(i32) = G_ANYEXT %3(i17) + $vgpr0 = COPY %4(i32) ... --- @@ -665,35 +751,39 @@ body: | ; SI-LABEL: name: test_fptoui_s32_to_s33 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; SI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; VI-LABEL: name: test_fptoui_s32_to_s33 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FMUL]] - ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s32) - ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s33) = G_FPTOUI %0 - %2:_(s64) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; VI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C]] + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(f32) = G_FFLOOR [[FMUL]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FFLOOR]], [[C1]], [[INTRINSIC_TRUNC]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FFLOOR]](f32) + ; VI-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMA]](f32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[FPTOUI1]](i32), [[FPTOUI]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i33) = G_FPTOUI %1(f32) + %3:_(i64) = G_ANYEXT %2(i33) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -705,23 +795,27 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s7 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s16_to_s7 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s7) = G_FPTOUI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i7) = G_FPTOUI %2(f16) + %4:_(i32) = G_ANYEXT %3(i7) + $vgpr0 = COPY %4(i32) ... --- @@ -733,23 +827,27 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s8 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s16_to_s8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s8) = G_FPTOUI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i8) = G_FPTOUI %2(f16) + %4:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -761,23 +859,27 @@ body: | ; SI-LABEL: name: test_fptoui_s16_to_s9 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FPEXT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FPEXT]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s16_to_s9 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s9) = G_FPTOUI %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f16) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i9) = G_FPTOUI %2(f16) + %4:_(i32) = G_ANYEXT %3(i9) + $vgpr0 = COPY %4(i32) ... --- @@ -789,19 +891,23 @@ body: | ; SI-LABEL: name: test_fptoui_s32_to_s15 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s32_to_s15 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s15) = G_FPTOUI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i15) = G_FPTOUI %1(f32) + %3:_(i32) = G_ANYEXT %2(i15) + $vgpr0 = COPY %3(i32) ... --- @@ -813,17 +919,21 @@ body: | ; SI-LABEL: name: test_fptoui_s32_to_s17 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; SI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + ; ; VI-LABEL: name: test_fptoui_s32_to_s17 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s17) = G_FPTOUI %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[BITCAST]](f32) + ; VI-NEXT: $vgpr0 = COPY [[FPTOUI]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i17) = G_FPTOUI %1(f32) + %3:_(i32) = G_ANYEXT %2(i17) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir index f513de8b9c770..865485b7f8bbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir @@ -10,12 +10,16 @@ body: | ; CHECK-LABEL: name: test_fptrunc_s64_to_s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[FPTRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_FPTRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f32) = G_FPTRUNC [[BITCAST]](f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[FPTRUNC]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f32) = G_FPTRUNC %1(f64) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -27,14 +31,18 @@ body: | ; CHECK-LABEL: name: test_fptrunc_s32_to_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_FPTRUNC %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f16) = G_FPTRUNC %1(f32) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -46,15 +54,19 @@ body: | ; CHECK-LABEL: name: test_fptrunc_v2s64_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[UV]](s64) - ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s32) = G_FPTRUNC [[UV1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTRUNC]](s32), [[FPTRUNC1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_FPTRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f32) = G_FPTRUNC [[UV]](f64) + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f32) = G_FPTRUNC [[UV1]](f64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FPTRUNC]](f32), [[FPTRUNC1]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f32>) = G_FPTRUNC %1(<2 x f64>) + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -66,24 +78,30 @@ body: | ; CHECK-LABEL: name: test_fptrunc_v2s32_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) - ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_FPTRUNC %0 - %2:_(<2 x s32>) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[UV]](f32) + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[UV1]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST2]](i16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST3]](<2 x f16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[BITCAST5]](i32), [[LSHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f16>) = G_FPTRUNC %1(<2 x f32>) + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + %4:_(<2 x i32>) = G_ANYEXT %3(<2 x i16>) + $vgpr0_vgpr1 = COPY %4(<2 x i32>) ... --- @@ -95,75 +113,83 @@ body: | ; CHECK-LABEL: name: test_fptrunc_s64_to_s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C5]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744 - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]] - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR7]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_FPTRUNC %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2047 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1008 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4094 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 511 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C5]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[UV]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR]](i32), [[C6]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[ZEXT]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 512 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR1]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 31744 + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SELECT]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ADD]], [[C9]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C10]], [[ADD]] + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[C6]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SMAX]], [[C11]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[C12]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR4]], [[SMIN]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[SMIN]](i32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL1]](i32), [[OR4]] + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[ZEXT1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD]](i32), [[C10]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[OR5]], [[OR3]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C13]] + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C14]](i32) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND3]](i32), [[C15]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP4]](i1) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[AND3]](i32), [[C16]] + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP5]](i1) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[ZEXT3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR3]], [[OR6]] + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[ADD]](i32), [[C17]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP6]](i1), [[C8]], [[ADD1]] + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 1039 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C18]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP7]](i1), [[OR2]], [[SELECT2]] + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C19]](i32) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 32768 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C20]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SELECT3]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR7]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST3]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f16) = G_FPTRUNC %1(f64) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -175,123 +201,134 @@ body: | ; CHECK-LABEL: name: test_fptrunc_v2s64_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C5]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744 - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]] - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C5]] - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[UV4]] - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR8]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP8]](s1) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[ZEXT4]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR9]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[C8]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C9]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD2]] - ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[C6]] - ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[C11]] - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[C12]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR12]], [[SMIN1]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[SMIN1]](s32) - ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL3]](s32), [[OR12]] - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP10]](s1) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[ZEXT5]] - ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD2]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP11]](s1), [[OR13]], [[OR11]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C13]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT5]], [[C14]](s32) - ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND8]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP12]](s1) - ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND8]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP13]](s1) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[ZEXT7]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[OR14]] - ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD2]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP14]](s1), [[C8]], [[ADD3]] - ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD2]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP15]](s1), [[OR10]], [[SELECT6]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C19]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C20]] - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SELECT7]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[OR7]], [[C21]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[OR15]], [[C21]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C19]](s32) - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s16>) = G_FPTRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2047 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1008 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4094 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 511 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C5]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[UV2]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR]](i32), [[C6]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[ZEXT]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 512 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR1]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 31744 + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SELECT]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ADD]], [[C9]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C10]], [[ADD]] + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[C6]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SMAX]], [[C11]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[C12]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR4]], [[SMIN]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[SMIN]](i32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL1]](i32), [[OR4]] + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[ZEXT1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD]](i32), [[C10]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[OR5]], [[OR3]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C13]] + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C14]](i32) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND3]](i32), [[C15]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP4]](i1) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[AND3]](i32), [[C16]] + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP5]](i1) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[ZEXT3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR3]], [[OR6]] + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[ADD]](i32), [[C17]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP6]](i1), [[C8]], [[ADD1]] + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 1039 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C18]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP7]](i1), [[OR2]], [[SELECT2]] + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C19]](i32) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 32768 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C20]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SELECT3]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR7]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[AND5]], [[C2]] + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C3]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR6]], [[C4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C5]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND7]], [[UV4]] + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR8]](i32), [[C6]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP8]](i1) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[ZEXT4]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR9]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP9]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SELECT4]], [[C8]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ADD2]], [[C9]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[SHL2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C10]], [[ADD2]] + ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[C6]] + ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[C11]] + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[C12]] + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[OR12]], [[SMIN1]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR7]], [[SMIN1]](i32) + ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL3]](i32), [[OR12]] + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP10]](i1) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[LSHR7]], [[ZEXT5]] + ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD2]](i32), [[C10]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP11]](i1), [[OR13]], [[OR11]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[SELECT5]], [[C13]] + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[SELECT5]], [[C14]](i32) + ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND8]](i32), [[C15]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP12]](i1) + ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[AND8]](i32), [[C16]] + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP13]](i1) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[ZEXT7]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[LSHR8]], [[OR14]] + ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[ADD2]](i32), [[C17]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(i32) = G_SELECT [[ICMP14]](i1), [[C8]], [[ADD3]] + ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ADD2]](i32), [[C18]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(i32) = G_SELECT [[ICMP15]](i1), [[OR10]], [[SELECT6]] + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C19]](i32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[LSHR9]], [[C20]] + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[AND9]], [[SELECT7]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR15]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C19]](i32) + ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR16]](i32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f16>) = G_FPTRUNC %1(<2 x f64>) + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -303,75 +340,83 @@ body: | ; CHECK-LABEL: name: test_fptrunc_s64_to_s16_afn ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C5]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744 - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]] - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR7]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_FPTRUNC %0 - %2:_(s32) = afn G_ANYEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2047 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1008 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4094 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 511 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C5]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[UV]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR]](i32), [[C6]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[ZEXT]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 512 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR1]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 31744 + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SELECT]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ADD]], [[C9]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C10]], [[ADD]] + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[C6]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SMAX]], [[C11]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[C12]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR4]], [[SMIN]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[SMIN]](i32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL1]](i32), [[OR4]] + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[ZEXT1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD]](i32), [[C10]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[OR5]], [[OR3]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C13]] + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C14]](i32) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND3]](i32), [[C15]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP4]](i1) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[AND3]](i32), [[C16]] + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP5]](i1) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[ZEXT3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR3]], [[OR6]] + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[ADD]](i32), [[C17]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP6]](i1), [[C8]], [[ADD1]] + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 1039 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C18]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP7]](i1), [[OR2]], [[SELECT2]] + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C19]](i32) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 32768 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C20]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SELECT3]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR7]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = afn G_ANYEXT [[BITCAST3]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f16) = G_FPTRUNC %1(f64) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = afn G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -383,121 +428,132 @@ body: | ; CHECK-LABEL: name: test_fptrunc_v2s64_to_v2s16_afn ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C5]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744 - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]] - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C5]] - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[UV4]] - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR8]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP8]](s1) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[ZEXT4]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR9]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[C8]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C9]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD2]] - ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[C6]] - ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[C11]] - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[C12]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR12]], [[SMIN1]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[SMIN1]](s32) - ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL3]](s32), [[OR12]] - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP10]](s1) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[ZEXT5]] - ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD2]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP11]](s1), [[OR13]], [[OR11]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C13]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT5]], [[C14]](s32) - ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND8]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP12]](s1) - ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND8]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP13]](s1) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[ZEXT7]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[OR14]] - ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD2]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP14]](s1), [[C8]], [[ADD3]] - ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD2]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP15]](s1), [[OR10]], [[SELECT6]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C19]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C20]] - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SELECT7]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[OR7]], [[C21]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[OR15]], [[C21]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C19]](s32) - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s16>) = afn G_FPTRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2047 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1008 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4094 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 511 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C5]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[UV2]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR]](i32), [[C6]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[ZEXT]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 512 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR1]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 31744 + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SELECT]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ADD]], [[C9]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C10]], [[ADD]] + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[C6]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SMAX]], [[C11]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[C12]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR4]], [[SMIN]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[SMIN]](i32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL1]](i32), [[OR4]] + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[ZEXT1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD]](i32), [[C10]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[OR5]], [[OR3]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C13]] + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C14]](i32) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND3]](i32), [[C15]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP4]](i1) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[AND3]](i32), [[C16]] + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP5]](i1) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[ZEXT3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR3]], [[OR6]] + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[ADD]](i32), [[C17]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP6]](i1), [[C8]], [[ADD1]] + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 1039 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C18]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP7]](i1), [[OR2]], [[SELECT2]] + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C19]](i32) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 32768 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C20]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SELECT3]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR7]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST3]](i64) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[AND5]], [[C2]] + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C3]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR6]], [[C4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C5]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND7]], [[UV4]] + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR8]](i32), [[C6]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP8]](i1) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[ZEXT4]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[OR9]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP9]](i1), [[C7]], [[C6]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SELECT4]], [[C8]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ADD2]], [[C9]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[SHL2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C10]], [[ADD2]] + ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[C6]] + ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[C11]] + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[C12]] + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[OR12]], [[SMIN1]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR7]], [[SMIN1]](i32) + ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL3]](i32), [[OR12]] + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP10]](i1) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[LSHR7]], [[ZEXT5]] + ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD2]](i32), [[C10]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP11]](i1), [[OR13]], [[OR11]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[SELECT5]], [[C13]] + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[SELECT5]], [[C14]](i32) + ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND8]](i32), [[C15]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP12]](i1) + ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[AND8]](i32), [[C16]] + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP13]](i1) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[ZEXT7]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[LSHR8]], [[OR14]] + ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[ADD2]](i32), [[C17]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(i32) = G_SELECT [[ICMP14]](i1), [[C8]], [[ADD3]] + ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[ADD2]](i32), [[C18]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(i32) = G_SELECT [[ICMP15]](i1), [[OR10]], [[SELECT6]] + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C19]](i32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[LSHR9]], [[C20]] + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[AND9]], [[SELECT7]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR15]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C19]](i32) + ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR16]](i32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f16>) = afn G_FPTRUNC %1(<2 x f64>) + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index b08f850b5b2b1..af92235e1be49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -8,16 +8,16 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s1) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i1) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FREEZE]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i1) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -26,14 +26,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s7 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s7) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i7) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i7) + $vgpr0 = COPY %3(i32) ... --- @@ -42,14 +42,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s8 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s8) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i8) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i8) + $vgpr0 = COPY %3(i32) ... --- @@ -58,16 +58,16 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i16) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[FREEZE]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -76,12 +76,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FREEZE %0 - $vgpr0 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... --- @@ -90,14 +90,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s48 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_TRUNC %0 - %2:_(s48) = G_FREEZE %1 - %3:_(s64) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i64) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i48) = G_TRUNC %0(i64) + %2:_(i48) = G_FREEZE %1 + %3:_(i64) = G_ANYEXT %2(i48) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -106,12 +106,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i64) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -120,20 +120,20 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s65 - ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[MV2]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[FREEZE]](s128) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s65) = G_TRUNC %0 - %2:_(s65) = G_FREEZE %1 - %3:_(s96) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i128) = G_FREEZE [[MV2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[FREEZE]](i128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i65) = G_TRUNC %0(i96) + %2:_(i65) = G_FREEZE %1 + %3:_(i96) = G_ANYEXT %2(i65) + $vgpr0_vgpr1_vgpr2 = COPY %3(i96) ... --- @@ -142,12 +142,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s128 - ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i128) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -156,12 +156,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_256 - ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s256) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s256) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i256) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](i256) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i256) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... --- @@ -170,14 +170,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s448 - ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s448) = G_TRUNC %0 - %2:_(s448) = G_FREEZE %1 - %3:_(s512) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i512) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](i512) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i448) = G_TRUNC %0(i512) + %2:_(i448) = G_FREEZE %1 + %3:_(i512) = G_ANYEXT %2(i448) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(i512) ... --- @@ -186,12 +186,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s512 - ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s512) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i512) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](i512) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i512) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) ... --- @@ -200,16 +200,16 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s1024 - ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1024) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s1024) = G_ANYEXT %0 - %2:_(s1024) = G_FREEZE %1 - S_NOP 0, implicit %2 + ; CHECK: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i512) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV3]](i64), [[UV4]](i64), [[UV5]](i64), [[UV6]](i64), [[UV7]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i1024) = G_FREEZE [[MV]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](i1024) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i1024) = G_ANYEXT %0(i512) + %2:_(i1024) = G_FREEZE %1 + S_NOP 0, implicit %2(i1024) ... --- @@ -218,14 +218,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s1056 - ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1056) = G_ANYEXT [[COPY]](s512) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1056) = G_FREEZE [[ANYEXT]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1056) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s1056) = G_ANYEXT %0 - %2:_(s1056) = G_FREEZE %1 - S_NOP 0, implicit %2 + ; CHECK: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i1056) = G_ANYEXT [[COPY]](i512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i1056) = G_FREEZE [[ANYEXT]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](i1056) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i1056) = G_ANYEXT %0(i512) + %2:_(i1056) = G_FREEZE %1 + S_NOP 0, implicit %2(i1056) ... --- @@ -234,19 +234,19 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_s2048 - ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s2048) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s2048) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s2048) = G_ANYEXT %0 - %2:_(s2048) = G_FREEZE %1 - S_NOP 0, implicit %2 + ; CHECK: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i512) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV3]](i64), [[UV4]](i64), [[UV5]](i64), [[UV6]](i64), [[UV7]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64), [[DEF]](i64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(i1024) = G_FREEZE [[MV]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(i1024) = G_FREEZE [[MV1]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i2048) = G_MERGE_VALUES [[FREEZE]](i1024), [[FREEZE1]](i1024) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i2048) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i2048) = G_ANYEXT %0(i512) + %2:_(i2048) = G_FREEZE %1 + S_NOP 0, implicit %2(i2048) ... --- @@ -255,12 +255,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -269,12 +269,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -283,12 +283,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -297,12 +297,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v5s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<5 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>) - %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - %1:_(<5 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<5 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x i32>) + %0:_(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:_(<5 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x i32>) ... --- @@ -311,12 +311,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v6s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<6 x s32>) - %0:_(<6 x s32>) = G_IMPLICIT_DEF - %1:_(<6 x s32>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x i32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<6 x i32>) + %0:_(<6 x i32>) = G_IMPLICIT_DEF + %1:_(<6 x i32>) = G_FREEZE %0 + S_NOP 0, implicit %1(<6 x i32>) ... --- @@ -325,12 +325,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v7s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<7 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<7 x s32>) - %0:_(<7 x s32>) = G_IMPLICIT_DEF - %1:_(<7 x s32>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<7 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<7 x i32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<7 x i32>) + %0:_(<7 x i32>) = G_IMPLICIT_DEF + %1:_(<7 x i32>) = G_FREEZE %0 + S_NOP 0, implicit %1(<7 x i32>) ... --- @@ -339,12 +339,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v8s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x i32>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -353,12 +353,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v16s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x i32>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -367,12 +367,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v17s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<16 x s32>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s32>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<16 x i32>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i32>) = G_FREEZE %0 + S_NOP 0, implicit %1(<16 x i32>) ... --- @@ -381,12 +381,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v32s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<32 x s32>) - %0:_(<32 x s32>) = G_IMPLICIT_DEF - %1:_(<32 x s32>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x i32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<32 x i32>) + %0:_(<32 x i32>) = G_IMPLICIT_DEF + %1:_(<32 x i32>) = G_FREEZE %0 + S_NOP 0, implicit %1(<32 x i32>) ... --- @@ -395,16 +395,16 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>) - %0:_(<33 x s32>) = G_IMPLICIT_DEF - %1:_(<33 x s32>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x i32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(i32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[FREEZE]](<32 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32), [[UV8]](i32), [[UV9]](i32), [[UV10]](i32), [[UV11]](i32), [[UV12]](i32), [[UV13]](i32), [[UV14]](i32), [[UV15]](i32), [[UV16]](i32), [[UV17]](i32), [[UV18]](i32), [[UV19]](i32), [[UV20]](i32), [[UV21]](i32), [[UV22]](i32), [[UV23]](i32), [[UV24]](i32), [[UV25]](i32), [[UV26]](i32), [[UV27]](i32), [[UV28]](i32), [[UV29]](i32), [[UV30]](i32), [[UV31]](i32), [[FREEZE1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x i32>) + %0:_(<33 x i32>) = G_IMPLICIT_DEF + %1:_(<33 x i32>) = G_FREEZE %0 + S_NOP 0, implicit %1(<33 x i32>) ... --- @@ -413,14 +413,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<32 x s32>), [[FREEZE1]](<32 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>) - %0:_(<64 x s32>) = G_IMPLICIT_DEF - %1:_(<64 x s32>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x i32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<32 x i32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x i32>) = G_CONCAT_VECTORS [[FREEZE]](<32 x i32>), [[FREEZE1]](<32 x i32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x i32>) + %0:_(<64 x i32>) = G_IMPLICIT_DEF + %1:_(<64 x i32>) = G_FREEZE %0 + S_NOP 0, implicit %1(<64 x i32>) ... --- @@ -432,23 +432,23 @@ body: | ; CHECK-LABEL: name: test_freeze_v2s1 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[BUILD_VECTOR]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %3:_(<2 x s1>) = G_FREEZE %2 - %4:_(<2 x s32>) = G_ANYEXT %3 - $vgpr0_vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x i32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %1 + %3:_(<2 x i1>) = G_FREEZE %2 + %4:_(<2 x i32>) = G_ANYEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %4(<2 x i32>) ... --- @@ -460,25 +460,25 @@ body: | ; CHECK-LABEL: name: test_freeze_v3s1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[BUILD_VECTOR]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %3:_(<3 x s1>) = G_FREEZE %2 - %4:_(<3 x s32>) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x i32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %3:_(<3 x i1>) = G_FREEZE %2 + %4:_(<3 x i32>) = G_ANYEXT %3(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %4(<3 x i32>) ... --- @@ -487,14 +487,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v2s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_TRUNC %0 - %2:_(<2 x s8>) = G_FREEZE %1 - %3:_(<2 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i8>) = G_TRUNC %0(<2 x i32>) + %2:_(<2 x i8>) = G_FREEZE %1 + %3:_(<2 x i32>) = G_ANYEXT %2(<2 x i8>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -503,19 +503,19 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v3s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s8>) = G_TRUNC %0 - %2:_(<3 x s8>) = G_FREEZE %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x i32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[FREEZE]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[UV4]](i32), [[UV5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i8>) = G_TRUNC %0(<3 x i32>) + %2:_(<3 x i8>) = G_FREEZE %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -524,12 +524,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FREEZE %0 - $vgpr0 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_FREEZE %0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -538,33 +538,33 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v3s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[CONCAT_VECTORS]] - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(<3 x s16>) = G_FREEZE %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x i16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[LSHR]](i32), [[BITCAST3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(<3 x i16>) = G_FREEZE %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -573,12 +573,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v4s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -587,40 +587,40 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v5s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[CONCAT_VECTORS]] - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST3]](s32), [[LSHR]](s32), [[BITCAST4]](s32), [[LSHR1]](s32), [[BITCAST5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR]](<5 x s32>) - %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - %1:_(<5 x s16>) = G_TRUNC %0 - %2:_(<5 x s16>) = G_FREEZE %1 - %3:_(<5 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %3 + ; CHECK: [[COPY:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<5 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x i16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[BITCAST3]](i32), [[LSHR]](i32), [[BITCAST4]](i32), [[LSHR1]](i32), [[BITCAST5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR]](<5 x i32>) + %0:_(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:_(<5 x i16>) = G_TRUNC %0(<5 x i32>) + %2:_(<5 x i16>) = G_FREEZE %1 + %3:_(<5 x i32>) = G_ANYEXT %2(<5 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %3(<5 x i32>) ... --- @@ -629,12 +629,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v6s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -643,12 +643,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v8s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x s16>) - %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<8 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x i16>) + %0:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<8 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -657,12 +657,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x i64>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -674,30 +674,30 @@ body: | ; CHECK-LABEL: name: test_freeze_v4s8 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s8) = G_TRUNC %0 - %5:_(s8) = G_TRUNC %1 - %6:_(s8) = G_TRUNC %2 - %7:_(s8) = G_TRUNC %3 - %8:_(<4 x s8>) = G_BUILD_VECTOR %4, %5, %6, %7 - %9:_(<4 x s8>) = G_FREEZE %8 - %10:_(s8), %11:_(s8), %12:_(s8), %13:_(s8) = G_UNMERGE_VALUES %9 - %14:_(s32) = G_ANYEXT %10 - %15:_(s32) = G_ANYEXT %11 - %16:_(s32) = G_ANYEXT %12 - %17:_(s32) = G_ANYEXT %13 - S_ENDPGM 0, implicit %14, implicit %15, implicit %16, implicit %17 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x i32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[FREEZE]](<4 x i32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](i32), implicit [[UV1]](i32), implicit [[UV2]](i32), implicit [[UV3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i8) = G_TRUNC %0(i32) + %5:_(i8) = G_TRUNC %1(i32) + %6:_(i8) = G_TRUNC %2(i32) + %7:_(i8) = G_TRUNC %3(i32) + %8:_(<4 x i8>) = G_BUILD_VECTOR %4(i8), %5(i8), %6(i8), %7(i8) + %9:_(<4 x i8>) = G_FREEZE %8 + %10:_(i8), %11:_(i8), %12:_(i8), %13:_(i8) = G_UNMERGE_VALUES %9(<4 x i8>) + %14:_(i32) = G_ANYEXT %10(i8) + %15:_(i32) = G_ANYEXT %11(i8) + %16:_(i32) = G_ANYEXT %12(i8) + %17:_(i32) = G_ANYEXT %13(i8) + S_ENDPGM 0, implicit %14(i32), implicit %15(i32), implicit %16(i32), implicit %17(i32) ... --- @@ -711,7 +711,7 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p0) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -725,7 +725,7 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -739,7 +739,7 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](p2) %0:_(p2) = COPY $vgpr0 %1:_(p2) = G_FREEZE %0 - $vgpr0 = COPY %1 + $vgpr0 = COPY %1(p2) ... --- @@ -753,7 +753,7 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](p3) %0:_(p3) = COPY $vgpr0 %1:_(p3) = G_FREEZE %0 - $vgpr0 = COPY %1 + $vgpr0 = COPY %1(p3) ... --- @@ -767,7 +767,7 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -781,7 +781,7 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_FREEZE %0 - $vgpr0 = COPY %1 + $vgpr0 = COPY %1(p5) ... --- @@ -795,7 +795,7 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p999) %0:_(p999) = COPY $vgpr0_vgpr1 %1:_(p999) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = COPY %1(p999) ... @@ -805,12 +805,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v2s1024 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s1024>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s1024>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<2 x s1024>) - %0:_(<2 x s1024>) = G_IMPLICIT_DEF - %1:_(<2 x s1024>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i1024>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x i1024>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<2 x i1024>) + %0:_(<2 x i1024>) = G_IMPLICIT_DEF + %1:_(<2 x i1024>) = G_FREEZE %0 + S_NOP 0, implicit %1(<2 x i1024>) ... --- @@ -820,10 +820,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v3s1024 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1024>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x s1024>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<3 x s1024>) - %0:_(<3 x s1024>) = G_IMPLICIT_DEF - %1:_(<3 x s1024>) = G_FREEZE %0 - S_NOP 0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x i1024>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x i1024>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<3 x i1024>) + %0:_(<3 x i1024>) = G_IMPLICIT_DEF + %1:_(<3 x i1024>) = G_FREEZE %0 + S_NOP 0, implicit %1(<3 x i1024>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index 240036207bd0d..62038db199fb2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -12,49 +12,49 @@ body: | ; SI-LABEL: name: test_fshl_s32_s32 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY1]], [[C]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[FSHR1]](i32) ; ; VI-LABEL: name: test_fshl_s32_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY1]], [[C]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[FSHR1]](i32) ; ; GFX9-LABEL: name: test_fshl_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[FSHR1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FSHL %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[FSHR1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_FSHL %0, %1, %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -66,73 +66,73 @@ body: | ; SI-LABEL: name: test_fshl_v2s32_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; SI-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; SI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV2]], [[C]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV4]], [[C1]] + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](i32) + ; SI-NEXT: [[FSHR2:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV3]], [[C]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV5]], [[C1]] + ; SI-NEXT: [[FSHR3:%[0-9]+]]:_(i32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FSHR1]](i32), [[FSHR3]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_fshl_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; VI-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; VI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV2]], [[C]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV4]], [[C1]] + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](i32) + ; VI-NEXT: [[FSHR2:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV3]], [[C]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV5]], [[C1]] + ; VI-NEXT: [[FSHR3:%[0-9]+]]:_(i32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FSHR1]](i32), [[FSHR3]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_fshl_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; GFX9-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; GFX9-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = G_FSHL %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV4]], [[C1]] + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](i32) + ; GFX9-NEXT: [[FSHR2:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV5]], [[C1]] + ; GFX9-NEXT: [[FSHR3:%[0-9]+]]:_(i32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FSHR1]](i32), [[FSHR3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i32>) = G_FSHL %0, %1, %2(<2 x i32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -144,82 +144,82 @@ body: | ; SI-LABEL: name: test_fshl_s16_s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC1]], [[TRUNC2]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C2]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC1]], [[TRUNC2]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fshl_s16_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[LSHR]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fshl_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(s16) = G_FSHL %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[LSHR]], [[AND1]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(i16) = G_FSHL %3, %4, %5(i16) + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -231,121 +231,121 @@ body: | ; SI-LABEL: name: test_fshl_v2s16_v2s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR5]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C3]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[AND3]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[ZEXT2]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY3]](i32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[AND4]](i16) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR5]], [[ZEXT3]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_fshl_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C3]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR4]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR6]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC4]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C3]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[LSHR3]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR4]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[TRUNC5]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C3]](i16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[LSHR5]], [[AND3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR6]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-LABEL: name: test_fshl_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = G_FSHL %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY2]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY2]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[AND]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY1]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[LSHR]], [[AND1]](<2 x i16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x i16>) = G_FSHL %0, %1, %2(<2 x i16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -357,67 +357,67 @@ body: | ; SI-LABEL: name: test_fshl_s64_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL]], [[LSHR1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[LSHR]], [[TRUNC1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) ; ; VI-LABEL: name: test_fshl_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL]], [[LSHR1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C2]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[LSHR]], [[TRUNC1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) ; ; GFX9-LABEL: name: test_fshl_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = G_FSHL %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[LSHR]], [[TRUNC1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = G_FSHL %0, %1, %2(i64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -429,90 +429,90 @@ body: | ; SI-LABEL: name: test_fshl_s8_s8 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_fshl_s8_s8 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[C3]](s16) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[AND]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND3]], [[C3]](i16) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[AND1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[AND5]], [[AND4]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_fshl_s8_s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[C3]](s16) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s8) = G_TRUNC %0 - %4:_(s8) = G_TRUNC %1 - %5:_(s8) = G_TRUNC %2 - %6:_(s8) = G_FSHL %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[AND]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND3]], [[C3]](i16) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[AND1]](i32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[AND5]], [[AND4]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i8) = G_TRUNC %0(i32) + %4:_(i8) = G_TRUNC %1(i32) + %5:_(i8) = G_TRUNC %2(i32) + %6:_(i8) = G_FSHL %3, %4, %5(i8) + %7:_(i32) = G_ANYEXT %6(i8) + $vgpr0 = COPY %7(i32) ... --- @@ -524,134 +524,134 @@ body: | ; SI-LABEL: name: test_fshl_s24_s24 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; SI-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; SI-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; SI-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C5]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[AND3]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_fshl_s24_s24 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; VI-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; VI-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; VI-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C5]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[AND3]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_fshl_s24_s24 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s24) = G_TRUNC %0 - %4:_(s24) = G_TRUNC %1 - %5:_(s24) = G_TRUNC %2 - %6:_(s24) = G_FSHL %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C5]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[AND3]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i24) = G_TRUNC %0(i32) + %4:_(i24) = G_TRUNC %1(i32) + %5:_(i24) = G_TRUNC %2(i32) + %6:_(i24) = G_FSHL %3, %4, %5(i24) + %7:_(i32) = G_ANYEXT %6(i24) + $vgpr0 = COPY %7(i32) ... --- @@ -663,233 +663,233 @@ body: | ; SI-LABEL: name: test_fshl_v3s16_v3s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC3]], [[TRUNC4]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR5]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC5]], [[TRUNC6]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY7]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY4]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY5]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C3]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC3]], [[TRUNC4]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[AND3]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[ZEXT2]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY6]](i32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[AND4]](i16) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR5]], [[ZEXT3]](i32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC5]], [[TRUNC6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(i16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[AND5]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[ZEXT4]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY7]](i32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[AND6]](i16) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR7]], [[ZEXT5]](i32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL3]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[BITCAST6]], [[C4]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND8]], [[C]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL4]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[BITCAST7]], [[C4]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR9]], [[SHL5]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x i16>) ; ; VI-LABEL: name: test_fshl_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR4]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR6]] - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL2]], [[LSHR8]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY4]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY5]](<2 x i16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC6]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C3]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[LSHR3]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR4]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[TRUNC7]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C3]](i16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[LSHR5]], [[AND3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR6]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(i16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[AND4]](i16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C3]](i16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[LSHR7]], [[AND5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL2]], [[LSHR8]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST6]], [[C4]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST7]], [[C4]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR9]], [[SHL5]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x i16>) ; ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY2]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY5]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY1]], [[AND2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY3]], [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR3]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = COPY $vgpr3 - %4:_(<2 x s16>) = COPY $vgpr4 - %5:_(<2 x s16>) = COPY $vgpr5 - %6:_(<2 x s16>) = G_IMPLICIT_DEF - %7:_(<6 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>), %6(<2 x s16>) - %8:_(<3 x s16>), %9:_(<3 x s16>) = G_UNMERGE_VALUES %7(<6 x s16>) - %10:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %6(<2 x s16>) - %11:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %10(<6 x s16>) - %13:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>) - %14:_(<3 x s16>), %15:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) - %16:_(<3 x s16>) = G_FSHL %8, %11, %14(<3 x s16>) - %17:_(<3 x s16>) = G_IMPLICIT_DEF - %18:_(<6 x s16>) = G_CONCAT_VECTORS %16(<3 x s16>), %17(<3 x s16>) - %19:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %18(<6 x s16>) - $vgpr0 = COPY %19(<2 x s16>) - $vgpr1 = COPY %20(<2 x s16>) - $vgpr2 = COPY %21(<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY4]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY4]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[AND]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY2]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[LSHR]], [[AND1]](<2 x i16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY5]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY5]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY1]], [[AND2]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY3]], [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x i16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL1]], [[LSHR3]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[OR1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C3]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x i16>) = COPY $vgpr3 + %4:_(<2 x i16>) = COPY $vgpr4 + %5:_(<2 x i16>) = COPY $vgpr5 + %6:_(<2 x i16>) = G_IMPLICIT_DEF + %7:_(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %6(<2 x i16>) + %8:_(<3 x i16>), %9:_(<3 x i16>) = G_UNMERGE_VALUES %7(<6 x i16>) + %10:_(<6 x i16>) = G_CONCAT_VECTORS %2(<2 x i16>), %3(<2 x i16>), %6(<2 x i16>) + %11:_(<3 x i16>), %12:_(<3 x i16>) = G_UNMERGE_VALUES %10(<6 x i16>) + %13:_(<6 x i16>) = G_CONCAT_VECTORS %4(<2 x i16>), %5(<2 x i16>), %6(<2 x i16>) + %14:_(<3 x i16>), %15:_(<3 x i16>) = G_UNMERGE_VALUES %13(<6 x i16>) + %16:_(<3 x i16>) = G_FSHL %8, %11, %14(<3 x i16>) + %17:_(<3 x i16>) = G_IMPLICIT_DEF + %18:_(<6 x i16>) = G_CONCAT_VECTORS %16(<3 x i16>), %17(<3 x i16>) + %19:_(<2 x i16>), %20:_(<2 x i16>), %21:_(<2 x i16>) = G_UNMERGE_VALUES %18(<6 x i16>) + $vgpr0 = COPY %19(<2 x i16>) + $vgpr1 = COPY %20(<2 x i16>) + $vgpr2 = COPY %21(<2 x i16>) ... --- @@ -901,210 +901,210 @@ body: | ; SI-LABEL: name: test_fshl_v4s16_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY4]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY5]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL5]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C3]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[AND3]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[ZEXT2]](i32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR2]], [[COPY3]](i32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[AND4]](i16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LSHR8]], [[ZEXT3]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(i16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[AND5]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[ZEXT4]](i32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY4]](i32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[AND6]](i16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR10]], [[ZEXT5]](i32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC3]], [[C2]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C1]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[AND8]](i16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[ZEXT6]](i32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY5]](i32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[AND9]](i16) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[ZEXT7]](i32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL4]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT11]], [[C]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT10]], [[SHL5]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_fshl_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[LSHR6]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR7]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR9]] - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C3]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL2]], [[LSHR11]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[AND6]](s16) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C3]](s16) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND7]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR13]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C3]](i16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[LSHR6]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR7]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC9]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[TRUNC9]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C3]](i16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[LSHR8]], [[AND3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR9]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC10]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(i16) = G_XOR [[TRUNC10]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[AND4]](i16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C3]](i16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[LSHR10]], [[AND5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL2]], [[LSHR11]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC11]], [[C1]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC11]], [[C2]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[AND6]](i16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C3]](i16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[LSHR12]], [[AND7]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL3]], [[LSHR13]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_fshl_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[AND2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<4 x s16>) = G_FSHL %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[UV4]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV4]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV]], [[AND]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV2]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[LSHR]], [[AND1]](<2 x i16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x i16>) = G_AND [[UV5]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV5]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV1]], [[AND2]](<2 x i16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV3]], [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x i16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL1]], [[LSHR3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[OR]](<2 x i16>), [[OR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x i16>) = G_FSHL %0, %1, %2(<4 x i16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index fa18012eb43fa..09e428abeb938 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -14,34 +14,34 @@ body: | ; SI-LABEL: name: test_fshr_s32_s32 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[FSHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](i32) + ; SI-NEXT: $vgpr0 = COPY [[FSHR]](i32) ; ; VI-LABEL: name: test_fshr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[FSHR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[FSHR]](i32) ; ; GFX9-LABEL: name: test_fshr_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[FSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FSHR %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[FSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_FSHR %0, %1, %2(i32) + $vgpr0 = COPY %3(i32) ... --- @@ -53,49 +53,49 @@ body: | ; SI-LABEL: name: test_fshr_v2s32_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV2]], [[UV4]](i32) + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FSHR]](i32), [[FSHR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_fshr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV2]], [[UV4]](i32) + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FSHR]](i32), [[FSHR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_fshr_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = G_FSHR %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV2]], [[UV4]](i32) + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[FSHR]](i32), [[FSHR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i32>) = G_FSHR %0, %1, %2(<2 x i32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -107,81 +107,81 @@ body: | ; SI-LABEL: name: test_fshr_s16_s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC1]], [[TRUNC2]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC1]], [[TRUNC2]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fshr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND1]](i16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[AND]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fshr_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(s16) = G_FSHR %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND1]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[AND]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(i16) = G_FSHR %3, %4, %5(i16) + %7:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -193,188 +193,188 @@ body: | ; SI-LABEL: name: test_fshr_v2s16_v2s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C4]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR2]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[AND3]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[ZEXT2]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY3]](i32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[AND4]](i16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[ZEXT3]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[COPY4]](i32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR6]], [[COPY5]](i32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[COPY7]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY2]], [[BITCAST3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[XOR2]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[AND5]](i16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT]], [[ZEXT4]](i32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[SHL5]](i32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY8]](i32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[AND6]](i16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LSHR8]], [[ZEXT5]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(i16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[AND8]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[OR1]](i16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT1]], [[ZEXT6]](i32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[SHL6]](i32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND10]], [[COPY9]](i32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[AND9]](i16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR10]], [[ZEXT7]](i32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) ; ; VI-LABEL: name: test_fshr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR9]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[LSHR2]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C]](i16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[LSHR4]], [[AND3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC4]], [[C]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC5]], [[C]](i16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY2]], [[BITCAST3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[XOR2]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[OR]], [[AND4]](i16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[C]](i16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[LSHR8]], [[AND5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL5]], [[LSHR9]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(i16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[OR1]], [[AND6]](i16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[C]](i16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[LSHR10]], [[AND7]](i16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL6]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL7]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) ; ; GFX9-LABEL: name: test_fshr_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = G_FSHR %0, %1, %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY2]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY2]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[SHL]], [[AND1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY1]], [[AND]](<2 x i16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x i16>) = G_FSHR %0, %1, %2(<2 x i16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -386,67 +386,67 @@ body: | ; SI-LABEL: name: test_fshr_s64_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[C2]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SHL]], [[TRUNC]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[TRUNC1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) ; ; VI-LABEL: name: test_fshr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[C2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SHL]], [[TRUNC]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[TRUNC1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) ; ; GFX9-LABEL: name: test_fshr_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = G_FSHR %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[SHL]], [[TRUNC]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[TRUNC1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = COPY $vgpr4_vgpr5 + %3:_(i64) = G_FSHR %0, %1, %2(i64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -458,87 +458,87 @@ body: | ; SI-LABEL: name: test_fshr_s8_s8 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C2]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[AND]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_fshr_s8_s8 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[AND1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND2]](i16) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[AND]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND4]], [[AND3]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL1]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_fshr_s8_s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s8) = G_TRUNC %0 - %4:_(s8) = G_TRUNC %1 - %5:_(s8) = G_TRUNC %2 - %6:_(s8) = G_FSHR %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[AND1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND2]](i16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[AND]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND4]], [[AND3]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL1]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i8) = G_TRUNC %0(i32) + %4:_(i8) = G_TRUNC %1(i32) + %5:_(i8) = G_TRUNC %2(i32) + %6:_(i8) = G_FSHR %3, %4, %5(i8) + %7:_(i32) = G_ANYEXT %6(i8) + $vgpr0 = COPY %7(i32) ... --- @@ -550,131 +550,131 @@ body: | ; SI-LABEL: name: test_fshr_s24_s24 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; SI-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; SI-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; SI-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C5]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_fshr_s24_s24 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; VI-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; VI-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; VI-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C5]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_fshr_s24_s24 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s24) = G_TRUNC %0 - %4:_(s24) = G_TRUNC %1 - %5:_(s24) = G_TRUNC %2 - %6:_(s24) = G_FSHR %3, %4, %5 - %7:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C2]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND2]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i24) = G_TRUNC %0(i32) + %4:_(i24) = G_TRUNC %1(i32) + %5:_(i24) = G_TRUNC %2(i32) + %6:_(i24) = G_FSHR %3, %4, %5(i24) + %7:_(i32) = G_ANYEXT %6(i24) + $vgpr0 = COPY %7(i32) ... --- @@ -686,329 +686,329 @@ body: | ; SI-LABEL: name: test_fshr_v3s16_v3s16 ; SI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY12]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY13]](s32) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[ZEXT9]](s32) - ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL9]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C3]] - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[ZEXT11]](s32) - ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC13]], [[TRUNC14]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL11]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL12]] - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] - ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[C1]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C4]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR2]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[C1]], [[C3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[AND3]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[ZEXT2]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY6]](i32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[AND4]](i16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[ZEXT3]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[COPY7]](i32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[COPY8]](i32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[COPY10]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[COPY9]], [[SHL4]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY4]], [[BITCAST4]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[XOR2]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[AND5]](i16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT]], [[ZEXT4]](i32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[SHL5]](i32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY11]](i32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[AND6]](i16) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR7]], [[ZEXT5]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(i16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[AND8]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[OR1]](i16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT1]], [[ZEXT6]](i32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[SHL6]](i32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[AND10]], [[COPY12]](i32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[AND9]](i16) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[ZEXT7]](i32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(i16) = G_XOR [[C1]], [[C3]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[AND11]](i16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[ZEXT8]](i32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[SHL7]](i32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[AND13]], [[COPY13]](i32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[AND12]](i16) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LSHR11]], [[ZEXT9]](i32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[BITCAST3]], [[COPY14]](i32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[COPY16]], [[C]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[COPY15]], [[SHL9]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY5]], [[BITCAST6]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[XOR6]](<2 x i16>) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[TRUNC12]], [[C2]] + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(i16) = G_XOR [[TRUNC12]], [[C3]] + ; SI-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[XOR7]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[AND14]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[OR5]](i16) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT2]], [[ZEXT10]](i32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[SHL10]](i32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(i32) = G_AND [[SHL8]], [[C5]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND16]], [[COPY17]](i32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[AND15]](i16) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR13]], [[ZEXT11]](i32) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR14]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC13]], [[TRUNC14]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT13]], [[C]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[ZEXT12]], [[SHL11]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[OR7]](i16) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(i32) = G_AND [[BITCAST8]], [[C5]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[AND17]], [[C]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[ZEXT14]], [[SHL12]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(i32) = G_AND [[BITCAST9]], [[C5]] + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[AND18]], [[C]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[LSHR15]], [[SHL13]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x i16>) ; ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C1]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR8]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C1]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[LSHR9]], [[AND7]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR10]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND8]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND9]](s16) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL7]], [[LSHR12]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL9]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C2]] - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C3]] - ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[OR5]], [[AND10]](s16) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[SHL8]], [[C1]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[LSHR13]], [[AND11]](s16) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL10]], [[LSHR14]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL11]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C4]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL12]] - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] - ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[C1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[C1]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[LSHR2]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[C1]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[C1]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C1]](i16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[LSHR4]], [[AND3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC4]], [[C1]](i16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[COPY6]], [[SHL4]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY4]], [[BITCAST4]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[XOR2]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[OR]], [[AND4]](i16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[C1]](i16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[LSHR7]], [[AND5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL5]], [[LSHR8]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(i16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[OR1]], [[AND6]](i16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[C1]](i16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[LSHR9]], [[AND7]](i16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL6]], [[LSHR10]] + ; VI-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[C1]], [[C2]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(i16) = G_XOR [[C1]], [[C3]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[XOR5]], [[C2]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[AND8]](i16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C1]](i16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i16) = G_LSHR [[LSHR11]], [[AND9]](i16) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL7]], [[LSHR12]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i16) = G_SHL [[TRUNC5]], [[C1]](i16) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[COPY8]], [[C]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[COPY7]], [[SHL9]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY5]], [[BITCAST6]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[XOR6]](<2 x i16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[AND10:%[0-9]+]]:_(i16) = G_AND [[TRUNC8]], [[C2]] + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(i16) = G_XOR [[TRUNC8]], [[C3]] + ; VI-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[XOR7]], [[C2]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i16) = G_SHL [[OR5]], [[AND10]](i16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[SHL8]], [[C1]](i16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[LSHR13]], [[AND11]](i16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL10]], [[LSHR14]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL11]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR7]](i16) + ; VI-NEXT: [[AND12:%[0-9]+]]:_(i32) = G_AND [[BITCAST8]], [[C4]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[AND12]], [[C]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL12]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; VI-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[BITCAST9]], [[C4]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[AND13]], [[C]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[LSHR15]], [[SHL13]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x i16>) ; ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY2]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY5]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY1]], [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY3]], [[AND2]](<2 x s16>) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL3]], [[LSHR1]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(<2 x s16>) = COPY $vgpr3 - %4:_(<2 x s16>) = COPY $vgpr4 - %5:_(<2 x s16>) = COPY $vgpr5 - %6:_(<2 x s16>) = G_IMPLICIT_DEF - %7:_(<6 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>), %6(<2 x s16>) - %8:_(<3 x s16>), %9:_(<3 x s16>) = G_UNMERGE_VALUES %7(<6 x s16>) - %10:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %6(<2 x s16>) - %11:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %10(<6 x s16>) - %13:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>) - %14:_(<3 x s16>), %15:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) - %16:_(<3 x s16>) = G_FSHR %8, %11, %14(<3 x s16>) - %17:_(<3 x s16>) = G_IMPLICIT_DEF - %18:_(<6 x s16>) = G_CONCAT_VECTORS %16(<3 x s16>), %17(<3 x s16>) - %19:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %18(<6 x s16>) - $vgpr0 = COPY %19(<2 x s16>) - $vgpr1 = COPY %20(<2 x s16>) - $vgpr2 = COPY %21(<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY4]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY4]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[SHL]], [[AND1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY2]], [[AND]](<2 x i16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY5]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY5]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY1]], [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x i16>) = G_SHL [[SHL2]], [[AND3]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY3]], [[AND2]](<2 x i16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL3]], [[LSHR1]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[OR1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C3]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<2 x i16>) = COPY $vgpr3 + %4:_(<2 x i16>) = COPY $vgpr4 + %5:_(<2 x i16>) = COPY $vgpr5 + %6:_(<2 x i16>) = G_IMPLICIT_DEF + %7:_(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %6(<2 x i16>) + %8:_(<3 x i16>), %9:_(<3 x i16>) = G_UNMERGE_VALUES %7(<6 x i16>) + %10:_(<6 x i16>) = G_CONCAT_VECTORS %2(<2 x i16>), %3(<2 x i16>), %6(<2 x i16>) + %11:_(<3 x i16>), %12:_(<3 x i16>) = G_UNMERGE_VALUES %10(<6 x i16>) + %13:_(<6 x i16>) = G_CONCAT_VECTORS %4(<2 x i16>), %5(<2 x i16>), %6(<2 x i16>) + %14:_(<3 x i16>), %15:_(<3 x i16>) = G_UNMERGE_VALUES %13(<6 x i16>) + %16:_(<3 x i16>) = G_FSHR %8, %11, %14(<3 x i16>) + %17:_(<3 x i16>) = G_IMPLICIT_DEF + %18:_(<6 x i16>) = G_CONCAT_VECTORS %16(<3 x i16>), %17(<3 x i16>) + %19:_(<2 x i16>), %20:_(<2 x i16>), %21:_(<2 x i16>) = G_UNMERGE_VALUES %18(<6 x i16>) + $vgpr0 = COPY %19(<2 x i16>) + $vgpr1 = COPY %20(<2 x i16>) + $vgpr2 = COPY %21(<2 x i16>) ... --- @@ -1020,343 +1020,343 @@ body: | ; SI-LABEL: name: test_fshr_v4s16_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY10]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LSHR14]], [[ZEXT11]](s32) - ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR16]], [[ZEXT13]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC12]], [[TRUNC13]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY16]](s32) - ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LSHR20]], [[ZEXT15]](s32) - ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC16]], [[TRUNC17]] - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) - ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR22]], [[ZEXT17]](s32) - ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC18]], [[TRUNC19]] - ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]] - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[ZEXT]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C4]](i32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR2]], [[ZEXT1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[AND3]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[ZEXT2]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY3]](i32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[AND4]](i16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[ZEXT3]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[COPY4]](i32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR6]], [[COPY5]](i32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[COPY7]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV4]], [[BITCAST3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[XOR2]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[AND5]](i16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT]], [[ZEXT4]](i32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[SHL5]](i32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY8]](i32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[AND6]](i16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LSHR8]], [[ZEXT5]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(i16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[AND8]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[OR1]](i16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT1]], [[ZEXT6]](i32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[SHL6]](i32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND10]], [[COPY9]](i32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[AND9]](i16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR10]], [[ZEXT7]](i32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT9]], [[C1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C1]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C1]](i32) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[AND11]](i16) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[BITCAST6]], [[ZEXT10]](i32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[SHL8]](i32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(i32) = G_AND [[BITCAST7]], [[C5]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[AND13]], [[COPY10]](i32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[AND12]](i16) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LSHR14]], [[ZEXT11]](i32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[XOR6]], [[C2]] + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[AND14]](i16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[LSHR12]], [[ZEXT12]](i32) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[SHL9]](i32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LSHR13]], [[COPY11]](i32) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[AND15]](i16) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LSHR16]], [[ZEXT13]](i32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR17]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC12]], [[TRUNC13]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C1]](i32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[BITCAST8]], [[COPY12]](i32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LSHR18]], [[COPY13]](i32) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[COPY15]], [[C1]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[COPY14]], [[SHL12]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV5]], [[BITCAST9]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[XOR7]](<2 x i16>) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C1]](i32) + ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR19]](i32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(i16) = G_AND [[TRUNC14]], [[C2]] + ; SI-NEXT: [[XOR8:%[0-9]+]]:_(i16) = G_XOR [[TRUNC14]], [[C3]] + ; SI-NEXT: [[AND17:%[0-9]+]]:_(i16) = G_AND [[XOR8]], [[C2]] + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[AND16]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[OR6]](i16) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT2]], [[ZEXT14]](i32) + ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[SHL13]](i32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(i32) = G_AND [[SHL10]], [[C5]] + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[AND18]], [[COPY16]](i32) + ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[AND17]](i16) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LSHR20]], [[ZEXT15]](i32) + ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC16]], [[TRUNC17]] + ; SI-NEXT: [[AND19:%[0-9]+]]:_(i16) = G_AND [[TRUNC15]], [[C2]] + ; SI-NEXT: [[XOR9:%[0-9]+]]:_(i16) = G_XOR [[TRUNC15]], [[C3]] + ; SI-NEXT: [[AND20:%[0-9]+]]:_(i16) = G_AND [[XOR9]], [[C2]] + ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[AND19]](i16) + ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[OR7]](i16) + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ANYEXT3]], [[ZEXT16]](i32) + ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[SHL14]](i32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND21:%[0-9]+]]:_(i32) = G_AND [[SHL11]], [[C5]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[AND21]], [[COPY17]](i32) + ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[AND20]](i16) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LSHR22]], [[ZEXT17]](i32) + ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR23]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i16) = disjoint G_OR [[TRUNC18]], [[TRUNC19]] + ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[OR9]](i16) + ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[OR10]](i16) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXT19]], [[C1]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[ZEXT18]], [[SHL15]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR11]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST11]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_fshr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR9]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL8]], [[LSHR15]] - ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16) - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL9]], [[LSHR17]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL13]], [[LSHR21]] - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16) - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16) - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL14]], [[LSHR23]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]] - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[LSHR2]], [[AND1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND2]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C]](i16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[LSHR4]], [[AND3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC4]], [[C]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC5]], [[C]](i16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV4]], [[BITCAST3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[XOR2]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(i16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[OR]], [[AND4]](i16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[C]](i16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[LSHR8]], [[AND5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL5]], [[LSHR9]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(i16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[OR1]], [[AND6]](i16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[C]](i16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[LSHR10]], [[AND7]](i16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL6]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL7]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C1]](i32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C1]](i32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; VI-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[XOR5]], [[C2]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i16) = G_SHL [[TRUNC8]], [[AND8]](i16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC10]], [[C]](i16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i16) = G_LSHR [[LSHR14]], [[AND9]](i16) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL8]], [[LSHR15]] + ; VI-NEXT: [[AND10:%[0-9]+]]:_(i16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(i16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[XOR6]], [[C2]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i16) = G_SHL [[TRUNC9]], [[AND10]](i16) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC11]], [[C]](i16) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(i16) = G_LSHR [[LSHR16]], [[AND11]](i16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL9]], [[LSHR17]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C1]](i32) + ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i16) = G_SHL [[TRUNC12]], [[C]](i16) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i16) = G_SHL [[TRUNC13]], [[C]](i16) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[COPY5]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[COPY4]], [[SHL12]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV5]], [[BITCAST9]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[XOR7]](<2 x i16>) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C1]](i32) + ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR19]](i32) + ; VI-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[TRUNC14]], [[C2]] + ; VI-NEXT: [[XOR8:%[0-9]+]]:_(i16) = G_XOR [[TRUNC14]], [[C3]] + ; VI-NEXT: [[AND13:%[0-9]+]]:_(i16) = G_AND [[XOR8]], [[C2]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i16) = G_SHL [[OR6]], [[AND12]](i16) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(i16) = G_LSHR [[SHL10]], [[C]](i16) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(i16) = G_LSHR [[LSHR20]], [[AND13]](i16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL13]], [[LSHR21]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[TRUNC15]], [[C2]] + ; VI-NEXT: [[XOR9:%[0-9]+]]:_(i16) = G_XOR [[TRUNC15]], [[C3]] + ; VI-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[XOR9]], [[C2]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i16) = G_SHL [[OR7]], [[AND14]](i16) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(i16) = G_LSHR [[SHL11]], [[C]](i16) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(i16) = G_LSHR [[LSHR22]], [[AND15]](i16) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i16) = disjoint G_OR [[SHL14]], [[LSHR23]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR9]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR10]](i16) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL15]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR11]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST11]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_fshr_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[AND2]](<2 x s16>) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL3]], [[LSHR1]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %3:_(<4 x s16>) = G_FSHR %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[UV4]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV4]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[SHL]], [[AND1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV2]], [[AND]](<2 x i16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x i16>) = G_AND [[UV5]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x i16>) = G_XOR [[UV5]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x i16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV1]], [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x i16>) = G_SHL [[SHL2]], [[AND3]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV3]], [[AND2]](<2 x i16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x i16>) = disjoint G_OR [[SHL3]], [[LSHR1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[OR]](<2 x i16>), [[OR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x i16>) = G_FSHR %0, %1, %2(<4 x i16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir index e7a808bdd6de4..78b40f77b8405 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -14,32 +14,42 @@ body: | ; SI-LABEL: name: test_fsin_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_fsin_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fsin_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FSIN %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSIN %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -51,32 +61,42 @@ body: | ; SI-LABEL: name: test_fsin_s64 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT1]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_fsin_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT1]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_fsin_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FSIN %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INT]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FSIN %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_fsin_s16 @@ -87,42 +107,52 @@ body: | ; SI-LABEL: name: test_fsin_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; VI-LABEL: name: test_fsin_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_fsin_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FSIN %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FSIN %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -134,46 +164,56 @@ body: | ; SI-LABEL: name: test_fsin_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; VI-LABEL: name: test_fsin_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_fsin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT]](s32), [[INT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FSIN %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INT]](f32), [[INT1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FSIN %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -185,54 +225,64 @@ body: | ; SI-LABEL: name: test_fsin_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](f32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32), [[INT5]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; VI-LABEL: name: test_fsin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](f32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT1]](f32), [[INT3]](f32), [[INT5]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fsin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s32) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT]](s32), [[INT1]](s32), [[INT2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FSIN %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](f32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](f32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[INT]](f32), [[INT1]](f32), [[INT2]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST1]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FSIN %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -244,46 +294,56 @@ body: | ; SI-LABEL: name: test_fsin_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[C]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f64) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[C]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f64) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT1]](f64), [[INT3]](f64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; VI-LABEL: name: test_fsin_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[C]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f64) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f64) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f64) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT1]](f64), [[INT3]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; GFX9-LABEL: name: test_fsin_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C883 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s64) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[C]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT]](s64), [[INT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FSIN %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x3FC45F306DC9C883 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f64) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](f64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INT]](f64), [[INT1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FSIN %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -295,70 +355,93 @@ body: | ; SI-LABEL: name: test_fsin_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %18(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C1]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; VI-LABEL: name: test_fsin_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; VI-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %14(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f16) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_fsin_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FSIN %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %11(i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT]](f16), [[INT1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FSIN %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -367,87 +450,114 @@ body: | bb.0: ; SI-LABEL: name: test_fsin_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]] - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %48(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %43(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C1]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[C1]] + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT5]](f32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_fsin_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s16) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %42(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %37(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f16) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f16) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f16) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](f16) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_fsin_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FSIN %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %34(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST]], [[C1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](f16) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](f16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[INT]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[INT2]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FSIN %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -459,119 +569,155 @@ body: | ; SI-LABEL: name: test_fsin_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]] - ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) - ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT3]], [[C1]] - ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) - ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s32) - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %33(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %39(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %34(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT]], [[C1]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f32) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT2]], [[C1]] + ; SI-NEXT: [[INT4:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f32) + ; SI-NEXT: [[INT5:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT5]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[FPEXT3]], [[C1]] + ; SI-NEXT: [[INT6:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](f32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](f32) + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT7]](f32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; VI-LABEL: name: test_fsin_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s16) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) - ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] - ; VI-NEXT: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) - ; VI-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %26(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %32(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f16) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[C1]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](f16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](f16) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; VI-NEXT: [[INT4:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](f16) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](f16) + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[C1]] + ; VI-NEXT: [[INT6:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](f16) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[INT1]](f16) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[INT3]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[INT5]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[INT7]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) + ; ; GFX9-LABEL: name: test_fsin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] - ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FSIN %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %21(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %27(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %28(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f16) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST3]], [[C1]] + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](f16) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](f16) + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f16) = G_FMUL [[BITCAST4]], [[C1]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL3]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT]](f16), [[INT1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INT2]](f16), [[INT3]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST9]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FSIN %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- @@ -583,31 +729,41 @@ body: | ; SI-LABEL: name: test_fsin_s32_flags ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_fsin_s32_flags ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) - ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; VI-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](f32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_fsin_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nnan G_FSIN %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3FC45F3060000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nnan G_FMUL [[BITCAST]], [[C]] + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(f32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan G_FSIN %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index e7c88b30b27a3..bbeff9c36639f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -14,35 +14,42 @@ body: | ; GCN-LABEL: name: test_fsqrt_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x39F0000000000000 - ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[C]](s32), [[COPY]] - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C1]] - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[FMUL]], [[COPY]] - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[C2]] - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] - ; GCN-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT]], [[SELECT]] - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[C3]] - ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]] - ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT]], [[SELECT]] - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[FMA]](s32), [[C4]] - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[ADD]], [[INT]] - ; GCN-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FMA1]](s32), [[C4]] - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[ADD1]], [[SELECT1]] - ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3EF0000000000000 - ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[SELECT2]], [[C5]] - ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[FMUL1]], [[SELECT2]] - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[SELECT]](s32), 608 - ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[IS_FPCLASS]](s1), [[SELECT]], [[SELECT3]] - ; GCN-NEXT: $vgpr0 = COPY [[SELECT4]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FSQRT %0 - $vgpr0 = COPY %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x39F0000000000000 + ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[C]](f32), [[BITCAST]] + ; GCN-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[C1]] + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[FMUL]], [[BITCAST]] + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[C2]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[ADD]](i32) + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; GCN-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT]], [[SELECT]] + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[C3]] + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[ADD1]](i32) + ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST3]] + ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT]], [[SELECT]] + ; GCN-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ole), [[FMA]](f32), [[C4]] + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[BITCAST2]], [[INT]] + ; GCN-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FMA1]](f32), [[C4]] + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[BITCAST3]], [[SELECT1]] + ; GCN-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3EF0000000000000 + ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[SELECT2]], [[C5]] + ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[FMUL1]], [[SELECT2]] + ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[SELECT]](f32), 608 + ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[IS_FPCLASS]](i1), [[SELECT]], [[SELECT3]] + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT4]](f32) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSQRT %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -54,36 +61,40 @@ body: | ; GCN-LABEL: name: test_fsqrt_s64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x1000000000000000 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s64), [[C]] - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C1]] - ; GCN-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[COPY]], [[SELECT]](s32) - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP]](s64) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT]], [[C3]] - ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FLDEXP]], [[INT]] - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FMUL]] - ; GCN-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL1]], [[C3]] - ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMUL1]], [[FMA]], [[FMUL1]] - ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FMUL]], [[FMA]], [[FMUL]] - ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[FMA1]] - ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA1]], [[FLDEXP]] - ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FMA3]], [[FMA2]], [[FMA1]] - ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(s64) = G_FNEG [[FMA4]] - ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG2]], [[FMA4]], [[FLDEXP]] - ; GCN-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FMA5]], [[FMA2]], [[FMA4]] - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128 - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C1]] - ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(s64) = G_FLDEXP [[FMA6]], [[SELECT1]](s32) - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[FLDEXP]](s64), 608 - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[IS_FPCLASS]](s1), [[FLDEXP]], [[FLDEXP1]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SELECT2]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FSQRT %0 - $vgpr0_vgpr1 = COPY %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GCN-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x1000000000000000 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f64), [[C]] + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C2]], [[C1]] + ; GCN-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = G_FLDEXP [[BITCAST]], [[SELECT]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP]](f64) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT]], [[C3]] + ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[FLDEXP]], [[INT]] + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[FMUL]] + ; GCN-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL1]], [[C3]] + ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMUL1]], [[FMA]], [[FMUL1]] + ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FMUL]], [[FMA]], [[FMUL]] + ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[FMA1]] + ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA1]], [[FLDEXP]] + ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FMA3]], [[FMA2]], [[FMA1]] + ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(f64) = G_FNEG [[FMA4]] + ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG2]], [[FMA4]], [[FLDEXP]] + ; GCN-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[FMA5]], [[FMA2]], [[FMA4]] + ; GCN-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 -128 + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C4]], [[C1]] + ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(f64) = G_FLDEXP [[FMA6]], [[SELECT1]](i32) + ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[FLDEXP]](f64), 608 + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[IS_FPCLASS]](i1), [[FLDEXP]], [[FLDEXP1]] + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT2]](f64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_FSQRT %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... @@ -96,36 +107,40 @@ body: | ; GCN-LABEL: name: test_fsqrt_s64_ninf ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x1000000000000000 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s64), [[C]] - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C1]] - ; GCN-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = ninf G_FLDEXP [[COPY]], [[SELECT]](s32) - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP]](s64) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT]], [[C3]] - ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FLDEXP]], [[INT]] - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FMUL]] - ; GCN-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL1]], [[C3]] - ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMUL1]], [[FMA]], [[FMUL1]] - ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FMUL]], [[FMA]], [[FMUL]] - ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[FMA1]] - ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA1]], [[FLDEXP]] - ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FMA3]], [[FMA2]], [[FMA1]] - ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(s64) = G_FNEG [[FMA4]] - ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG2]], [[FMA4]], [[FLDEXP]] - ; GCN-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FMA5]], [[FMA2]], [[FMA4]] - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128 - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C1]] - ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(s64) = ninf G_FLDEXP [[FMA6]], [[SELECT1]](s32) - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[FLDEXP]](s64), 608 - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = ninf G_SELECT [[IS_FPCLASS]](s1), [[FLDEXP]], [[FLDEXP1]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SELECT2]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = ninf G_FSQRT %0 - $vgpr0_vgpr1 = COPY %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GCN-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x1000000000000000 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[BITCAST]](f64), [[C]] + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C2]], [[C1]] + ; GCN-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = ninf G_FLDEXP [[BITCAST]], [[SELECT]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP]](f64) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT]], [[C3]] + ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[FLDEXP]], [[INT]] + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[FMUL]] + ; GCN-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL1]], [[C3]] + ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMUL1]], [[FMA]], [[FMUL1]] + ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FMUL]], [[FMA]], [[FMUL]] + ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[FMA1]] + ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA1]], [[FLDEXP]] + ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FMA3]], [[FMA2]], [[FMA1]] + ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(f64) = G_FNEG [[FMA4]] + ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG2]], [[FMA4]], [[FLDEXP]] + ; GCN-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[FMA5]], [[FMA2]], [[FMA4]] + ; GCN-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 -128 + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C4]], [[C1]] + ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(f64) = ninf G_FLDEXP [[FMA6]], [[SELECT1]](i32) + ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[FLDEXP]](f64), 608 + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = ninf G_SELECT [[IS_FPCLASS]](i1), [[FLDEXP]], [[FLDEXP1]] + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT2]](f64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = ninf G_FSQRT %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -137,36 +152,44 @@ body: | ; SI-LABEL: name: test_fsqrt_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fsqrt_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fsqrt_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FSQRT %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FSQRT %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -178,55 +201,65 @@ body: | ; GCN-LABEL: name: test_fsqrt_v2s32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x39F0000000000000 - ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[C]](s32), [[UV]] - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C1]] - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[FMUL]], [[UV]] - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[C2]] - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] - ; GCN-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT]], [[SELECT]] - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[C3]] - ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]] - ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT]], [[SELECT]] - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[FMA]](s32), [[C4]] - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[ADD]], [[INT]] - ; GCN-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FMA1]](s32), [[C4]] - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[ADD1]], [[SELECT1]] - ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3EF0000000000000 - ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[SELECT2]], [[C5]] - ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[FMUL1]], [[SELECT2]] - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[SELECT]](s32), 608 - ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[IS_FPCLASS]](s1), [[SELECT]], [[SELECT3]] - ; GCN-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[C]](s32), [[UV1]] - ; GCN-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C1]] - ; GCN-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FMUL2]], [[UV1]] - ; GCN-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT5]](s32) - ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[INT1]], [[C2]] - ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[ADD2]] - ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT1]], [[SELECT5]] - ; GCN-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[INT1]], [[C3]] - ; GCN-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[ADD3]] - ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT1]], [[SELECT5]] - ; GCN-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[FMA2]](s32), [[C4]] - ; GCN-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[ADD2]], [[INT1]] - ; GCN-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FMA3]](s32), [[C4]] - ; GCN-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[ADD3]], [[SELECT6]] - ; GCN-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[SELECT7]], [[C5]] - ; GCN-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FMUL3]], [[SELECT7]] - ; GCN-NEXT: [[IS_FPCLASS1:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[SELECT5]](s32), 608 - ; GCN-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[IS_FPCLASS1]](s1), [[SELECT5]], [[SELECT8]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT4]](s32), [[SELECT9]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FSQRT %0 - $vgpr0_vgpr1 = COPY %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x39F0000000000000 + ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[C]](f32), [[UV]] + ; GCN-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C1]] + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[FMUL]], [[UV]] + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[C2]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[ADD]](i32) + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; GCN-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT]], [[SELECT]] + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[C3]] + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[ADD1]](i32) + ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST3]] + ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT]], [[SELECT]] + ; GCN-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ole), [[FMA]](f32), [[C4]] + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[BITCAST2]], [[INT]] + ; GCN-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FMA1]](f32), [[C4]] + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[BITCAST3]], [[SELECT1]] + ; GCN-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3EF0000000000000 + ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[SELECT2]], [[C5]] + ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[FMUL1]], [[SELECT2]] + ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[SELECT]](f32), 608 + ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[IS_FPCLASS]](i1), [[SELECT]], [[SELECT3]] + ; GCN-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[C]](f32), [[UV1]] + ; GCN-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C1]] + ; GCN-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FMUL2]], [[UV1]] + ; GCN-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT5]](f32) + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST4]], [[C2]] + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[ADD2]](i32) + ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST5]] + ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT1]], [[SELECT5]] + ; GCN-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[BITCAST4]], [[C3]] + ; GCN-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[ADD3]](i32) + ; GCN-NEXT: [[FNEG3:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST6]] + ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FNEG3]], [[INT1]], [[SELECT5]] + ; GCN-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(ole), [[FMA2]](f32), [[C4]] + ; GCN-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[BITCAST5]], [[INT1]] + ; GCN-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FMA3]](f32), [[C4]] + ; GCN-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[BITCAST6]], [[SELECT6]] + ; GCN-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[SELECT7]], [[C5]] + ; GCN-NEXT: [[SELECT8:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FMUL3]], [[SELECT7]] + ; GCN-NEXT: [[IS_FPCLASS1:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[SELECT5]](f32), 608 + ; GCN-NEXT: [[SELECT9:%[0-9]+]]:_(f32) = G_SELECT [[IS_FPCLASS1]](i1), [[SELECT5]], [[SELECT8]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[SELECT4]](f32), [[SELECT9]](f32) + ; GCN-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_FSQRT %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -238,73 +271,86 @@ body: | ; GCN-LABEL: name: test_fsqrt_v3s32 ; GCN: liveins: $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x39F0000000000000 - ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[C]](s32), [[UV]] - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C1]] - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[FMUL]], [[UV]] - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[C2]] - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] - ; GCN-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT]], [[SELECT]] - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[C3]] - ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]] - ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT]], [[SELECT]] - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[FMA]](s32), [[C4]] - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[ADD]], [[INT]] - ; GCN-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FMA1]](s32), [[C4]] - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[ADD1]], [[SELECT1]] - ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3EF0000000000000 - ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[SELECT2]], [[C5]] - ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[FMUL1]], [[SELECT2]] - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[SELECT]](s32), 608 - ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[IS_FPCLASS]](s1), [[SELECT]], [[SELECT3]] - ; GCN-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[C]](s32), [[UV1]] - ; GCN-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C1]] - ; GCN-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FMUL2]], [[UV1]] - ; GCN-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT5]](s32) - ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[INT1]], [[C2]] - ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[ADD2]] - ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT1]], [[SELECT5]] - ; GCN-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[INT1]], [[C3]] - ; GCN-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[ADD3]] - ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT1]], [[SELECT5]] - ; GCN-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[FMA2]](s32), [[C4]] - ; GCN-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[ADD2]], [[INT1]] - ; GCN-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FMA3]](s32), [[C4]] - ; GCN-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[ADD3]], [[SELECT6]] - ; GCN-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[SELECT7]], [[C5]] - ; GCN-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FMUL3]], [[SELECT7]] - ; GCN-NEXT: [[IS_FPCLASS1:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[SELECT5]](s32), 608 - ; GCN-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[IS_FPCLASS1]](s1), [[SELECT5]], [[SELECT8]] - ; GCN-NEXT: [[FCMP6:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[C]](s32), [[UV2]] - ; GCN-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C1]] - ; GCN-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[FCMP6]](s1), [[FMUL4]], [[UV2]] - ; GCN-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT10]](s32) - ; GCN-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[INT2]], [[C2]] - ; GCN-NEXT: [[FNEG4:%[0-9]+]]:_(s32) = G_FNEG [[ADD4]] - ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG4]], [[INT2]], [[SELECT10]] - ; GCN-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[INT2]], [[C3]] - ; GCN-NEXT: [[FNEG5:%[0-9]+]]:_(s32) = G_FNEG [[ADD5]] - ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG5]], [[INT2]], [[SELECT10]] - ; GCN-NEXT: [[FCMP7:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[FMA4]](s32), [[C4]] - ; GCN-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[FCMP7]](s1), [[ADD4]], [[INT2]] - ; GCN-NEXT: [[FCMP8:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FMA5]](s32), [[C4]] - ; GCN-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[FCMP8]](s1), [[ADD5]], [[SELECT11]] - ; GCN-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[SELECT12]], [[C5]] - ; GCN-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[FCMP6]](s1), [[FMUL5]], [[SELECT12]] - ; GCN-NEXT: [[IS_FPCLASS2:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[SELECT10]](s32), 608 - ; GCN-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[IS_FPCLASS2]](s1), [[SELECT10]], [[SELECT13]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SELECT4]](s32), [[SELECT9]](s32), [[SELECT14]](s32) - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FSQRT %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x39F0000000000000 + ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[C]](f32), [[UV]] + ; GCN-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UV]], [[C1]] + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[FMUL]], [[UV]] + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT]](f32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INT]](f32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[C2]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(f32) = G_BITCAST [[ADD]](i32) + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST2]] + ; GCN-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FNEG]], [[INT]], [[SELECT]] + ; GCN-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[C3]] + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[ADD1]](i32) + ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST3]] + ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FNEG1]], [[INT]], [[SELECT]] + ; GCN-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(ole), [[FMA]](f32), [[C4]] + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[BITCAST2]], [[INT]] + ; GCN-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FMA1]](f32), [[C4]] + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(f32) = G_SELECT [[FCMP2]](i1), [[BITCAST3]], [[SELECT1]] + ; GCN-NEXT: [[C5:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3EF0000000000000 + ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[SELECT2]], [[C5]] + ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[FMUL1]], [[SELECT2]] + ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[SELECT]](f32), 608 + ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(f32) = G_SELECT [[IS_FPCLASS]](i1), [[SELECT]], [[SELECT3]] + ; GCN-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[C]](f32), [[UV1]] + ; GCN-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[UV1]], [[C1]] + ; GCN-NEXT: [[SELECT5:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FMUL2]], [[UV1]] + ; GCN-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT5]](f32) + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[INT1]](f32) + ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST4]], [[C2]] + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(f32) = G_BITCAST [[ADD2]](i32) + ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST5]] + ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FNEG2]], [[INT1]], [[SELECT5]] + ; GCN-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[BITCAST4]], [[C3]] + ; GCN-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[ADD3]](i32) + ; GCN-NEXT: [[FNEG3:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST6]] + ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FNEG3]], [[INT1]], [[SELECT5]] + ; GCN-NEXT: [[FCMP4:%[0-9]+]]:_(i1) = G_FCMP floatpred(ole), [[FMA2]](f32), [[C4]] + ; GCN-NEXT: [[SELECT6:%[0-9]+]]:_(f32) = G_SELECT [[FCMP4]](i1), [[BITCAST5]], [[INT1]] + ; GCN-NEXT: [[FCMP5:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FMA3]](f32), [[C4]] + ; GCN-NEXT: [[SELECT7:%[0-9]+]]:_(f32) = G_SELECT [[FCMP5]](i1), [[BITCAST6]], [[SELECT6]] + ; GCN-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[SELECT7]], [[C5]] + ; GCN-NEXT: [[SELECT8:%[0-9]+]]:_(f32) = G_SELECT [[FCMP3]](i1), [[FMUL3]], [[SELECT7]] + ; GCN-NEXT: [[IS_FPCLASS1:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[SELECT5]](f32), 608 + ; GCN-NEXT: [[SELECT9:%[0-9]+]]:_(f32) = G_SELECT [[IS_FPCLASS1]](i1), [[SELECT5]], [[SELECT8]] + ; GCN-NEXT: [[FCMP6:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[C]](f32), [[UV2]] + ; GCN-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UV2]], [[C1]] + ; GCN-NEXT: [[SELECT10:%[0-9]+]]:_(f32) = G_SELECT [[FCMP6]](i1), [[FMUL4]], [[UV2]] + ; GCN-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[SELECT10]](f32) + ; GCN-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[INT2]](f32) + ; GCN-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[BITCAST7]], [[C2]] + ; GCN-NEXT: [[BITCAST8:%[0-9]+]]:_(f32) = G_BITCAST [[ADD4]](i32) + ; GCN-NEXT: [[FNEG4:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST8]] + ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(f32) = G_FMA [[FNEG4]], [[INT2]], [[SELECT10]] + ; GCN-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[BITCAST7]], [[C3]] + ; GCN-NEXT: [[BITCAST9:%[0-9]+]]:_(f32) = G_BITCAST [[ADD5]](i32) + ; GCN-NEXT: [[FNEG5:%[0-9]+]]:_(f32) = G_FNEG [[BITCAST9]] + ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(f32) = G_FMA [[FNEG5]], [[INT2]], [[SELECT10]] + ; GCN-NEXT: [[FCMP7:%[0-9]+]]:_(i1) = G_FCMP floatpred(ole), [[FMA4]](f32), [[C4]] + ; GCN-NEXT: [[SELECT11:%[0-9]+]]:_(f32) = G_SELECT [[FCMP7]](i1), [[BITCAST8]], [[INT2]] + ; GCN-NEXT: [[FCMP8:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FMA5]](f32), [[C4]] + ; GCN-NEXT: [[SELECT12:%[0-9]+]]:_(f32) = G_SELECT [[FCMP8]](i1), [[BITCAST9]], [[SELECT11]] + ; GCN-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[SELECT12]], [[C5]] + ; GCN-NEXT: [[SELECT13:%[0-9]+]]:_(f32) = G_SELECT [[FCMP6]](i1), [[FMUL5]], [[SELECT12]] + ; GCN-NEXT: [[IS_FPCLASS2:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[SELECT10]](f32), 608 + ; GCN-NEXT: [[SELECT14:%[0-9]+]]:_(f32) = G_SELECT [[IS_FPCLASS2]](i1), [[SELECT10]], [[SELECT13]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[SELECT4]](f32), [[SELECT9]](f32), [[SELECT14]](f32) + ; GCN-NEXT: [[BITCAST10:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST10]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x f32>) = G_FSQRT %1 + %3:_(<3 x i32>) = G_BITCAST %2(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -316,58 +362,62 @@ body: | ; GCN-LABEL: name: test_fsqrt_v2s64 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x1000000000000000 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s64), [[C]] - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C1]] - ; GCN-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UV]], [[SELECT]](s32) - ; GCN-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP]](s64) - ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT]], [[C3]] - ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FLDEXP]], [[INT]] - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FMUL]] - ; GCN-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL1]], [[C3]] - ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMUL1]], [[FMA]], [[FMUL1]] - ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FMUL]], [[FMA]], [[FMUL]] - ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[FMA1]] - ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA1]], [[FLDEXP]] - ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FMA3]], [[FMA2]], [[FMA1]] - ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(s64) = G_FNEG [[FMA4]] - ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG2]], [[FMA4]], [[FLDEXP]] - ; GCN-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FMA5]], [[FMA2]], [[FMA4]] - ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128 - ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C1]] - ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(s64) = G_FLDEXP [[FMA6]], [[SELECT1]](s32) - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[FLDEXP]](s64), 608 - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[IS_FPCLASS]](s1), [[FLDEXP]], [[FLDEXP1]] - ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s64), [[C]] - ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C1]] - ; GCN-NEXT: [[FLDEXP2:%[0-9]+]]:_(s64) = G_FLDEXP [[UV1]], [[SELECT3]](s32) - ; GCN-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP2]](s64) - ; GCN-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[INT1]], [[C3]] - ; GCN-NEXT: [[FMUL3:%[0-9]+]]:_(s64) = G_FMUL [[FLDEXP2]], [[INT1]] - ; GCN-NEXT: [[FNEG3:%[0-9]+]]:_(s64) = G_FNEG [[FMUL2]] - ; GCN-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG3]], [[FMUL3]], [[C3]] - ; GCN-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMUL3]], [[FMA7]], [[FMUL3]] - ; GCN-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FMUL2]], [[FMA7]], [[FMUL2]] - ; GCN-NEXT: [[FNEG4:%[0-9]+]]:_(s64) = G_FNEG [[FMA8]] - ; GCN-NEXT: [[FMA10:%[0-9]+]]:_(s64) = G_FMA [[FNEG4]], [[FMA8]], [[FLDEXP2]] - ; GCN-NEXT: [[FMA11:%[0-9]+]]:_(s64) = G_FMA [[FMA10]], [[FMA9]], [[FMA8]] - ; GCN-NEXT: [[FNEG5:%[0-9]+]]:_(s64) = G_FNEG [[FMA11]] - ; GCN-NEXT: [[FMA12:%[0-9]+]]:_(s64) = G_FMA [[FNEG5]], [[FMA11]], [[FLDEXP2]] - ; GCN-NEXT: [[FMA13:%[0-9]+]]:_(s64) = G_FMA [[FMA12]], [[FMA9]], [[FMA11]] - ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C1]] - ; GCN-NEXT: [[FLDEXP3:%[0-9]+]]:_(s64) = G_FLDEXP [[FMA13]], [[SELECT4]](s32) - ; GCN-NEXT: [[IS_FPCLASS1:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[FLDEXP2]](s64), 608 - ; GCN-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[IS_FPCLASS1]](s1), [[FLDEXP2]], [[FLDEXP3]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT2]](s64), [[SELECT5]](s64) - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FSQRT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 0x1000000000000000 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV]](f64), [[C]] + ; GCN-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 256 + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C2]], [[C1]] + ; GCN-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = G_FLDEXP [[UV]], [[SELECT]](i32) + ; GCN-NEXT: [[INT:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP]](f64) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GCN-NEXT: [[FMUL:%[0-9]+]]:_(f64) = G_FMUL [[INT]], [[C3]] + ; GCN-NEXT: [[FMUL1:%[0-9]+]]:_(f64) = G_FMUL [[FLDEXP]], [[INT]] + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[FMUL]] + ; GCN-NEXT: [[FMA:%[0-9]+]]:_(f64) = G_FMA [[FNEG]], [[FMUL1]], [[C3]] + ; GCN-NEXT: [[FMA1:%[0-9]+]]:_(f64) = G_FMA [[FMUL1]], [[FMA]], [[FMUL1]] + ; GCN-NEXT: [[FMA2:%[0-9]+]]:_(f64) = G_FMA [[FMUL]], [[FMA]], [[FMUL]] + ; GCN-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[FMA1]] + ; GCN-NEXT: [[FMA3:%[0-9]+]]:_(f64) = G_FMA [[FNEG1]], [[FMA1]], [[FLDEXP]] + ; GCN-NEXT: [[FMA4:%[0-9]+]]:_(f64) = G_FMA [[FMA3]], [[FMA2]], [[FMA1]] + ; GCN-NEXT: [[FNEG2:%[0-9]+]]:_(f64) = G_FNEG [[FMA4]] + ; GCN-NEXT: [[FMA5:%[0-9]+]]:_(f64) = G_FMA [[FNEG2]], [[FMA4]], [[FLDEXP]] + ; GCN-NEXT: [[FMA6:%[0-9]+]]:_(f64) = G_FMA [[FMA5]], [[FMA2]], [[FMA4]] + ; GCN-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 -128 + ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[FCMP]](i1), [[C4]], [[C1]] + ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(f64) = G_FLDEXP [[FMA6]], [[SELECT1]](i32) + ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[FLDEXP]](f64), 608 + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[IS_FPCLASS]](i1), [[FLDEXP]], [[FLDEXP1]] + ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(olt), [[UV1]](f64), [[C]] + ; GCN-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[FCMP1]](i1), [[C2]], [[C1]] + ; GCN-NEXT: [[FLDEXP2:%[0-9]+]]:_(f64) = G_FLDEXP [[UV1]], [[SELECT3]](i32) + ; GCN-NEXT: [[INT1:%[0-9]+]]:_(f64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[FLDEXP2]](f64) + ; GCN-NEXT: [[FMUL2:%[0-9]+]]:_(f64) = G_FMUL [[INT1]], [[C3]] + ; GCN-NEXT: [[FMUL3:%[0-9]+]]:_(f64) = G_FMUL [[FLDEXP2]], [[INT1]] + ; GCN-NEXT: [[FNEG3:%[0-9]+]]:_(f64) = G_FNEG [[FMUL2]] + ; GCN-NEXT: [[FMA7:%[0-9]+]]:_(f64) = G_FMA [[FNEG3]], [[FMUL3]], [[C3]] + ; GCN-NEXT: [[FMA8:%[0-9]+]]:_(f64) = G_FMA [[FMUL3]], [[FMA7]], [[FMUL3]] + ; GCN-NEXT: [[FMA9:%[0-9]+]]:_(f64) = G_FMA [[FMUL2]], [[FMA7]], [[FMUL2]] + ; GCN-NEXT: [[FNEG4:%[0-9]+]]:_(f64) = G_FNEG [[FMA8]] + ; GCN-NEXT: [[FMA10:%[0-9]+]]:_(f64) = G_FMA [[FNEG4]], [[FMA8]], [[FLDEXP2]] + ; GCN-NEXT: [[FMA11:%[0-9]+]]:_(f64) = G_FMA [[FMA10]], [[FMA9]], [[FMA8]] + ; GCN-NEXT: [[FNEG5:%[0-9]+]]:_(f64) = G_FNEG [[FMA11]] + ; GCN-NEXT: [[FMA12:%[0-9]+]]:_(f64) = G_FMA [[FNEG5]], [[FMA11]], [[FLDEXP2]] + ; GCN-NEXT: [[FMA13:%[0-9]+]]:_(f64) = G_FMA [[FMA12]], [[FMA9]], [[FMA11]] + ; GCN-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[FCMP1]](i1), [[C4]], [[C1]] + ; GCN-NEXT: [[FLDEXP3:%[0-9]+]]:_(f64) = G_FLDEXP [[FMA13]], [[SELECT4]](i32) + ; GCN-NEXT: [[IS_FPCLASS1:%[0-9]+]]:_(i1) = G_IS_FPCLASS [[FLDEXP2]](f64), 608 + ; GCN-NEXT: [[SELECT5:%[0-9]+]]:_(f64) = G_SELECT [[IS_FPCLASS1]](i1), [[FLDEXP2]], [[FLDEXP3]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[SELECT2]](f64), [[SELECT5]](f64) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_FSQRT %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -379,59 +429,80 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) ; ; VI-LABEL: name: test_fsqrt_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) ; ; GFX9-LABEL: name: test_fsqrt_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FSQRT %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FSQRT]](f16), [[FSQRT1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_FSQRT %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -440,71 +511,96 @@ body: | bb.0: ; SI-LABEL: name: test_fsqrt_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT2]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT2]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT2]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT2]](f32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_fsqrt_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST2]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT]](f16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT1]](f16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT2]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_fsqrt_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_FSQRT %0 - %2:_(<3 x s32>) = G_ANYEXT %1 - S_NOP 0, implicit %2 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x f16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[DEF]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %29(i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST2]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT]](f16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT2]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST7]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST8]](i16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST9]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x f16>) = G_IMPLICIT_DEF + %1:_(<3 x f16>) = G_FSQRT %0 + %2:_(<3 x i16>) = G_BITCAST %1(<3 x f16>) + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -516,96 +612,130 @@ body: | ; SI-LABEL: name: test_fsqrt_v4s16 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](s32) - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](s32) - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT2]](s32) - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT2]](s32) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT3]](s32) - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](f32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](f32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT1]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[INT2:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT2]](f32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT2]](f32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[INT3:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT3]](f32) + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INT3]](f32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) ; ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI-NEXT: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST1]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST3]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST2]] + ; VI-NEXT: [[FSQRT3:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST4]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT]](f16) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST9]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT2]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FSQRT3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x f16>), [[BITCAST14]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST15]](<4 x i16>) ; ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9-NEXT: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FSQRT %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %16(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %22(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %17(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %23(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST1]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST3]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST2]] + ; GFX9-NEXT: [[FSQRT3:%[0-9]+]]:_(f16) = G_FSQRT [[BITCAST4]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FSQRT]](f16), [[FSQRT1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FSQRT2]](f16), [[FSQRT3]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST9]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_FSQRT %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 206ad3e24ee02..b654f25f6686f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -14,30 +14,42 @@ body: | ; SI-LABEL: name: test_fsub_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[FSUB]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; SI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; VI-LABEL: name: test_fsub_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[FSUB]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) ; ; GFX9-LABEL: name: test_fsub_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[FSUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FSUB %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FSUB]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FSUB %2, %3 + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- name: test_fsub_s64 @@ -48,33 +60,45 @@ body: | ; SI-LABEL: name: test_fsub_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; VI-LABEL: name: test_fsub_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-LABEL: name: test_fsub_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FSUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FSUB %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -86,33 +110,45 @@ body: | ; SI-LABEL: name: test_fsub_s64_fmf ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = nnan nsz G_FADD [[BITCAST]], [[FNEG]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; VI-LABEL: name: test_fsub_s64_fmf ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f64) = nnan nsz G_FADD [[BITCAST]], [[FNEG]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) ; ; GFX9-LABEL: name: test_fsub_s64_fmf ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = nnan nsz G_FSUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = nnan nsz G_FADD [[BITCAST]], [[FNEG]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = nnan nsz G_FSUB %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -124,47 +160,59 @@ body: | ; SI-LABEL: name: test_fsub_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST1]] + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; VI-LABEL: name: test_fsub_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST]], [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_fsub_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST]], [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(f16) = G_BITCAST %2(i16) + %5:_(f16) = G_BITCAST %3(i16) + %6:_(f16) = G_FSUB %4, %5 + %7:_(i16) = G_BITCAST %6(f16) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) - %4:_(s16) = G_FSUB %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 ... --- @@ -176,42 +224,54 @@ body: | ; SI-LABEL: name: test_fsub_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[UV2]] - ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[UV2]] + ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fsub_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[UV2]] - ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[UV2]] + ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fsub_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[UV2]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FSUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[UV2]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = G_FSUB %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -223,42 +283,54 @@ body: | ; SI-LABEL: name: test_fsub_v2s32_flags ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV]], [[UV2]] - ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; SI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[UV]], [[UV2]] + ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = nnan G_FSUB [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; VI-LABEL: name: test_fsub_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV]], [[UV2]] - ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[UV]], [[UV2]] + ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = nnan G_FSUB [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) ; ; GFX9-LABEL: name: test_fsub_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV]], [[UV2]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = nnan G_FSUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f32), [[UV3:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f32>) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nnan G_FSUB [[UV]], [[UV2]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = nnan G_FSUB [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %3:_(<2 x f32>) = G_BITCAST %1(<2 x i32>) + %4:_(<2 x f32>) = nnan G_FSUB %2, %3 + %5:_(<2 x i32>) = G_BITCAST %4(<2 x f32>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -270,45 +342,57 @@ body: | ; SI-LABEL: name: test_fsub_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[UV3]] - ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV4]] - ; SI-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; SI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[UV3]] + ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[UV4]] + ; SI-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32), [[FSUB2]](f32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; VI-LABEL: name: test_fsub_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[UV3]] - ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV4]] - ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[UV3]] + ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[UV4]] + ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32), [[FSUB2]](f32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) ; ; GFX9-LABEL: name: test_fsub_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[UV3]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV4]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_FSUB %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x f32>) = G_BITCAST [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32), [[UV2:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<3 x f32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(f32), [[UV4:%[0-9]+]]:_(f32), [[UV5:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x f32>) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[UV3]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[UV4]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f32) = G_FSUB [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x f32>) = G_BUILD_VECTOR [[FSUB]](f32), [[FSUB1]](f32), [[FSUB2]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[BUILD_VECTOR]](<3 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x f32>) = G_BITCAST %0(<3 x i32>) + %3:_(<3 x f32>) = G_BITCAST %1(<3 x i32>) + %4:_(<3 x f32>) = G_FSUB %2, %3 + %5:_(<3 x i32>) = G_BITCAST %4(<3 x f32>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) ... --- @@ -320,48 +404,60 @@ body: | ; SI-LABEL: name: test_fsub_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV2]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV3]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; VI-LABEL: name: test_fsub_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV2]] + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV3]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) ; ; GFX9-LABEL: name: test_fsub_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_FSUB %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(f64), [[UV3:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST1]](<2 x f64>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[UV2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[UV3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD]](f64), [[FADD1]](f64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST2]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %3:_(<2 x f64>) = G_BITCAST %1(<2 x i64>) + %4:_(<2 x f64>) = G_FSUB %2, %3 + %5:_(<2 x i64>) = G_BITCAST %4(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) ... --- @@ -373,69 +469,97 @@ body: | ; SI-LABEL: name: test_fsub_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %20(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %26(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; VI-LABEL: name: test_fsub_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %19(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %14(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %20(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FNEG]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[BITCAST2]], [[BITCAST3]] + ; VI-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[BITCAST4]], [[BITCAST5]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST10]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST12]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST13]](<2 x i16>) ; ; GFX9-LABEL: name: test_fsub_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[COPY]], [[FNEG]] - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FSUB %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x f16>) = G_FADD [[BITCAST]], [[FNEG]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FADD]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_FSUB %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... --- @@ -446,151 +570,181 @@ body: | ; SI-LABEL: name: test_fsub_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]] - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST7]] + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST8]] + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG1]](f16) + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST9]] + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG2]](f16) + ; SI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_fsub_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[BITCAST7]] + ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[BITCAST8]] + ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[BITCAST9]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB]](f16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB1]](f16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB2]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST13]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST14]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST11]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x i16>), [[BITCAST16]](<2 x i16>), [[BITCAST17]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB]](s16), [[FSUB1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB2]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_FSUB %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST5]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[BITCAST7]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[BITCAST8]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[BITCAST9]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST10]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST11]](i32) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB1]](f16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST12]](i16), [[BITCAST13]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST14]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC7]](i16), [[TRUNC8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x f16>) = G_BITCAST %2(<3 x i16>) + %7:_(<3 x f16>) = G_BITCAST %4(<3 x i16>) + %8:_(<3 x f16>) = G_FSUB %6, %7 + %9:_(<3 x i16>) = G_IMPLICIT_DEF + %10:_(<3 x i16>) = G_BITCAST %8(<3 x f16>) + %11:_(<6 x i16>) = G_CONCAT_VECTORS %10(<3 x i16>), %9(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... @@ -603,135 +757,191 @@ body: | ; SI-LABEL: name: test_fsub_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]] - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC6]] - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] - ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) - ; SI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %46(i16) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %51(i16) + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %56(i16) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST10]] + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; SI-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST12]] + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG1]](f16) + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST11]] + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG2]](f16) + ; SI-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; SI-NEXT: [[FNEG3:%[0-9]+]]:_(f16) = G_FNEG [[BITCAST13]] + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG3]](f16) + ; SI-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; SI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; SI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; VI-LABEL: name: test_fsub_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[BITCAST10]] + ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[BITCAST12]] + ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[BITCAST11]] + ; VI-NEXT: [[FSUB3:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST5]], [[BITCAST13]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB]](f16) + ; VI-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB2]](f16) + ; VI-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FSUB3]](f16) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x f16>), [[BITCAST23]](<2 x f16>) + ; VI-NEXT: [[BITCAST24:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST24]](<4 x i16>) ; ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC4]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC5]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC6]] - ; GFX9-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[TRUNC7]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB]](s16), [[FSUB1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB2]](s16), [[FSUB3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FSUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %24(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %30(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %25(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %31(i16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x f16>), [[UV3:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST1]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV3]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST14]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST15]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV2]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[BITCAST10]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[BITCAST12]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[BITCAST11]] + ; GFX9-NEXT: [[FSUB3:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST5]], [[BITCAST13]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FSUB]](f16), [[FSUB1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FSUB2]](f16), [[FSUB3]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST18]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %3:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %4:_(<4 x f16>) = G_FSUB %2, %3 + %5:_(<4 x i16>) = G_BITCAST %4(<4 x f16>) + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir index 5ac4f1d8f264b..3aae8ec071214 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -13,34 +13,34 @@ body: | ; GFX7-LABEL: name: test_icmp_s32 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](i32) ; ; GFX8-LABEL: name: test_icmp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](i32) ; ; GFX9-LABEL: name: test_icmp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SELECT %2, %0, %1 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i32) = G_SELECT %2(i1), %0, %1 + $vgpr0 = COPY %3(i32) ... --- @@ -51,34 +51,34 @@ body: | ; GFX7-LABEL: name: test_icmp_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i64), [[COPY]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX8-LABEL: name: test_icmp_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i64), [[COPY]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: test_icmp_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s64) = G_SELECT %2, %0, %1 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i64), [[COPY]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i64), %1 + %3:_(i64) = G_SELECT %2(i1), %0, %1 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -89,45 +89,45 @@ body: | ; GFX7-LABEL: name: test_icmp_s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C1]](s32), [[AND]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C2]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C1]](i32), [[AND]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[TRUNC]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s16), [[TRUNC]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i16), [[TRUNC]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[TRUNC]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s16), [[TRUNC]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s16) = G_CONSTANT i16 0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %1 - %3:_(s1) = G_ICMP intpred(ne), %0, %2 - %4:_(s16) = G_SELECT %3, %0, %2 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i16), [[TRUNC]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i16) = G_CONSTANT i16 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i16), %2 + %4:_(i16) = G_SELECT %3(i1), %0, %2 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -138,51 +138,51 @@ body: | ; GFX7-LABEL: name: test_icmp_s8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] - ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[AND]] + ; GFX7-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[TRUNC]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_s8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[AND]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[TRUNC]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_s8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s32) = COPY $vgpr0 - %2:_(s8) = G_TRUNC %1 - %3:_(s1) = G_ICMP intpred(ne), %0, %2 - %4:_(s8) = G_SELECT %3, %0, %2 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[AND]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i8) = G_TRUNC %1(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i8), %2 + %4:_(i8) = G_SELECT %3(i1), %0, %2 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -193,42 +193,42 @@ body: | ; GFX7-LABEL: name: test_icmp_s24 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[AND]] + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](i32) ; ; GFX8-LABEL: name: test_icmp_s24 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[AND]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](i32) ; ; GFX9-LABEL: name: test_icmp_s24 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s24) = G_CONSTANT i24 0 - %1:_(s32) = COPY $vgpr0 - %2:_(s24) = G_TRUNC %1 - %3:_(s1) = G_ICMP intpred(ne), %0, %2 - %4:_(s24) = G_SELECT %3, %0, %2 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[AND]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](i32) + %0:_(i24) = G_CONSTANT i24 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i24) = G_TRUNC %1(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i24), %2 + %4:_(i24) = G_SELECT %3(i1), %0, %2 + %5:_(i32) = G_ANYEXT %4(i24) + $vgpr0 = COPY %5(i32) ... --- @@ -239,56 +239,56 @@ body: | ; GFX7-LABEL: name: test_icmp_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[UV]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[UV1]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_icmp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[UV1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_icmp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 - %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %3:_(<2 x s1>) = G_ICMP intpred(ne), %1, %2 - %4:_(<2 x s32>) = G_ZEXT %3 - S_NOP 0, implicit %4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[UV1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %0(i32) + %2:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x i1>) = G_ICMP intpred(ne), %1(<2 x i32>), %2 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + S_NOP 0, implicit %4(<2 x i32>) ... --- @@ -300,67 +300,67 @@ body: | ; GFX7-LABEL: name: test_icmp_v3s32 ; GFX7: liveins: $vgpr0_vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x i32>) = G_IMPLICIT_DEF + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<3 x i32>) + ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX8-LABEL: name: test_icmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x i32>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<3 x i32>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_icmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = G_IMPLICIT_DEF - %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %2:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %3:_(<3 x s32>) = G_ZEXT %2 - S_NOP 0, implicit %3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x i32>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = G_IMPLICIT_DEF + %1:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %2:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %3:_(<3 x i32>) = G_ZEXT %2(<3 x i1>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -373,79 +373,79 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV4]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] - ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] - ; GFX7-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x i32>)) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX7-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV4]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV5]] + ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV6]] + ; GFX7-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV3]](i32), [[UV7]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP3]](i1) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C]] + ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32), [[AND3]](i32) + ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) ; ; GFX8-LABEL: name: test_icmp_v4s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV4]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x i32>)) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV4]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV5]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV6]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV3]](i32), [[UV7]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32), [[AND3]](i32) + ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_icmp_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV4]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x i32>)) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV4]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV5]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV6]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV3]](i32), [[UV7]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C]] + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32), [[AND3]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x i32>) %0:_(p1) = G_IMPLICIT_DEF - %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load (<4 x s32>)) - %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %3:_(<4 x s1>) = G_ICMP intpred(ne), %1, %2 - %4:_(<4 x s32>) = G_ZEXT %3 - S_NOP 0, implicit %4 + %1:_(<4 x i32>) = G_LOAD %0(p1) :: (volatile load (<4 x i32>)) + %2:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %3:_(<4 x i1>) = G_ICMP intpred(ne), %1(<4 x i32>), %2 + %4:_(<4 x i32>) = G_ZEXT %3(<4 x i1>) + S_NOP 0, implicit %4(<4 x i32>) ... @@ -459,32 +459,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p0 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p0), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -497,32 +497,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p1), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -536,32 +536,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p2) = COPY $vgpr0 %1:_(p2) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p2), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -575,32 +575,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p3) = COPY $vgpr0 %1:_(p3) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p3), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -613,32 +613,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p4), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -652,32 +652,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p5 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p5 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p5) = COPY $vgpr0 %1:_(p5) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p5), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -690,32 +690,32 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] - ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX8-LABEL: name: test_icmp_p999 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_icmp_p999 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT]](i32) %0:_(p999) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SEXT %2 - $vgpr0 = COPY %3 + %2:_(i1) = G_ICMP intpred(ne), %0(p999), %1 + %3:_(i32) = G_SEXT %2(i1) + $vgpr0 = COPY %3(i32) ... --- @@ -730,14 +730,14 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX7-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_icmp_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -746,14 +746,14 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_icmp_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -762,19 +762,19 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %3:_(<2 x s32>) = G_SEXT %2 - $vgpr0_vgpr1 = COPY %3 + %2:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x p3>), %1 + %3:_(<2 x i32>) = G_SEXT %2(<2 x i1>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -789,14 +789,14 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX7-NEXT: [[UV:%[0-9]+]]:_(p999), [[UV1:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY]](<2 x p999>) ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(p999), [[UV3:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY1]](<2 x p999>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX7-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_icmp_v2p999 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 @@ -805,14 +805,14 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(p999), [[UV1:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY]](<2 x p999>) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(p999), [[UV3:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY1]](<2 x p999>) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_icmp_v2p999 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 @@ -821,19 +821,19 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(p999), [[UV1:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY]](<2 x p999>) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(p999), [[UV3:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY1]](<2 x p999>) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %3:_(<2 x s32>) = G_SEXT %2 - $vgpr0_vgpr1 = COPY %3 + %2:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x p999>), %1 + %3:_(<2 x i32>) = G_SEXT %2(<2 x i1>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -844,83 +844,83 @@ body: | ; GFX7-LABEL: name: test_icmp_v2s16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s32), [[AND1]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LSHR]](s32), [[LSHR1]] - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX7-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[AND]](i32), [[AND1]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[LSHR]](i32), [[LSHR1]] + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[UV1]], [[UV3]] + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_icmp_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[TRUNC2]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[TRUNC3]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[TRUNC2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[TRUNC3]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_icmp_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[TRUNC2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[TRUNC3]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %3:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[TRUNC2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[TRUNC3]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %3:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i16>), %1 + %5:_(<2 x i32>) = G_SELECT %4(<2 x i1>), %2, %3 + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -931,26 +931,26 @@ body: | ; GFX7-LABEL: name: test_icmp_s33 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] - ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i64), [[C]] + ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) ; ; GFX8-LABEL: name: test_icmp_s33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i64), [[C]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) ; ; GFX9-LABEL: name: test_icmp_s33 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s33) = G_CONSTANT i33 0 - %3:_(s1) = G_ICMP intpred(ne), %2, %2 - S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i64), [[C]] + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(i33) = G_CONSTANT i33 0 + %3:_(i1) = G_ICMP intpred(ne), %2(i33), %2 + S_ENDPGM 0, implicit %3(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir index 1e6f1e7e5dc1c..40e8de3bf36b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir @@ -7,14 +7,15 @@ name: test_implicit_def_s1025 body: | bb.0: ; TAHITI-LABEL: name: test_implicit_def_s1025 - ; TAHITI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; TAHITI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; TAHITI-NEXT: $vgpr0 = COPY [[UV]](s32) + ; TAHITI: [[DEF:%[0-9]+]]:_(i1024) = G_IMPLICIT_DEF + ; TAHITI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](i1024) + ; TAHITI-NEXT: $vgpr0 = COPY [[UV]](i32) + ; ; FIJI-LABEL: name: test_implicit_def_s1025 - ; FIJI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; FIJI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; FIJI-NEXT: $vgpr0 = COPY [[UV]](s32) - %0:_(s1025) = G_IMPLICIT_DEF - %1:_(s32) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; FIJI: [[DEF:%[0-9]+]]:_(i1024) = G_IMPLICIT_DEF + ; FIJI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](i1024) + ; FIJI-NEXT: $vgpr0 = COPY [[UV]](i32) + %0:_(i1025) = G_IMPLICIT_DEF + %1:_(i32) = G_TRUNC %0(i1025) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 8113ebfa5362e..0e76d7f601ae4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -8,11 +8,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s1 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s1) = G_IMPLICIT_DEF - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i1) = G_IMPLICIT_DEF + %1:_(i32) = G_ANYEXT %0(i1) + $vgpr0 = COPY %1(i32) ... --- @@ -21,11 +21,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s7 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s7) = G_IMPLICIT_DEF - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i7) = G_IMPLICIT_DEF + %1:_(i32) = G_ANYEXT %0(i7) + $vgpr0 = COPY %1(i32) ... --- @@ -34,11 +34,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s8 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s8) = G_IMPLICIT_DEF - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i8) = G_IMPLICIT_DEF + %1:_(i32) = G_ANYEXT %0(i8) + $vgpr0 = COPY %1(i32) ... --- @@ -47,11 +47,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s16 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s16) = G_IMPLICIT_DEF - %1:_(s32) = G_ANYEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i16) = G_IMPLICIT_DEF + %1:_(i32) = G_ANYEXT %0(i16) + $vgpr0 = COPY %1(i32) ... --- @@ -60,10 +60,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s32 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - %0:_(s32) = G_IMPLICIT_DEF - $vgpr0 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](i32) + %0:_(i32) = G_IMPLICIT_DEF + $vgpr0 = COPY %0(i32) ... --- @@ -72,11 +72,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_48 - ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s48) = G_IMPLICIT_DEF - %1:_(s64) = G_ANYEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i48) = G_IMPLICIT_DEF + %1:_(i64) = G_ANYEXT %0(i48) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -85,10 +85,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s64 - ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) - %0:_(s64) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](i64) + %0:_(i64) = G_IMPLICIT_DEF + $vgpr0_vgpr1 = COPY %0(i64) ... --- @@ -97,12 +97,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s65 - ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[DEF]](s128) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) - %0:_(s65) = G_IMPLICIT_DEF - %1:_(s96) = G_ANYEXT %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i128) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[DEF]](i128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) + %0:_(i65) = G_IMPLICIT_DEF + %1:_(i96) = G_ANYEXT %0(i65) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -111,10 +111,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s128 - ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](s128) - %0:_(s128) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(i128) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](i128) + %0:_(i128) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0(i128) ... --- @@ -123,10 +123,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_256 - ; CHECK: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](s256) - %0:_(s256) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](i256) + %0:_(i256) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0(i256) ... --- @@ -135,13 +135,13 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s448 - ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[DEF]](s512) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[TRUNC]](s448), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s448) = G_IMPLICIT_DEF - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i512) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i448) = G_TRUNC [[DEF]](i512) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[TRUNC]](i448), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i448) = G_IMPLICIT_DEF + %1:_(i32) = G_EXTRACT %0(i448), 0 + $vgpr0 = COPY %1(i32) ... --- @@ -150,12 +150,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s512 - ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s512) = G_IMPLICIT_DEF - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i512) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[DEF]](i512), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i512) = G_IMPLICIT_DEF + %1:_(i32) = G_EXTRACT %0(i512), 0 + $vgpr0 = COPY %1(i32) ... --- @@ -164,12 +164,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s1024 - ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) - %0:_(s1024) = G_IMPLICIT_DEF - %1:_(s32) = G_EXTRACT %0, 0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i1024) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[DEF]](i1024), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](i32) + %0:_(i1024) = G_IMPLICIT_DEF + %1:_(i32) = G_EXTRACT %0(i1024), 0 + $vgpr0 = COPY %1(i32) ... --- @@ -178,12 +178,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s1056 - ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - %0:_(s1056) = G_IMPLICIT_DEF - %1:_(s32) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i1024) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](i1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + %0:_(i1056) = G_IMPLICIT_DEF + %1:_(i32) = G_TRUNC %0(i1056) + $vgpr0 = COPY %1(i32) ... --- @@ -192,12 +192,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s2048 - ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s2048) = G_IMPLICIT_DEF - %1:_(s32) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(i1024) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[DEF]](i1024) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i2048) = G_IMPLICIT_DEF + %1:_(i32) = G_TRUNC %0(i2048) + $vgpr0 = COPY %1(i32) ... --- @@ -206,10 +206,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v2s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) - %0:_(<2 x s32>) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x i32>) + %0:_(<2 x i32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1 = COPY %0(<2 x i32>) ... --- @@ -218,10 +218,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) - %0:_(<3 x s32>) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x i32>) + %0:_(<3 x i32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2 = COPY %0(<3 x i32>) ... --- @@ -230,10 +230,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v4s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) - %0:_(<4 x s32>) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x i32>) + %0:_(<4 x i32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0(<4 x i32>) ... --- @@ -242,10 +242,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<5 x s32>) - %0:_(<5 x s32>) = G_IMPLICIT_DEF - S_NOP 0, implicit %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<5 x i32>) + %0:_(<5 x i32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0(<5 x i32>) ... --- @@ -254,10 +254,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v6s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<6 x s32>) - %0:_(<6 x s32>) = G_IMPLICIT_DEF - S_NOP 0, implicit %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<6 x i32>) + %0:_(<6 x i32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0(<6 x i32>) ... --- @@ -266,10 +266,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v7s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<7 x s32>) - %0:_(<7 x s32>) = G_IMPLICIT_DEF - S_NOP 0, implicit %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<7 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<7 x i32>) + %0:_(<7 x i32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0(<7 x i32>) ... --- @@ -278,10 +278,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v8s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>) - %0:_(<8 x s32>) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x i32>) + %0:_(<8 x i32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0(<8 x i32>) ... --- @@ -290,10 +290,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v16s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>) - %0:_(<16 x s32>) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x i32>) + %0:_(<16 x i32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0(<16 x i32>) ... --- @@ -302,10 +302,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v32s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<32 x s32>) - %0:_(<32 x s32>) = G_IMPLICIT_DEF - S_NOP 0, implicit %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<32 x i32>) + %0:_(<32 x i32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0(<32 x i32>) ... --- @@ -317,17 +317,17 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - %0:_(<33 x s32>) = G_IMPLICIT_DEF - %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 + ; CHECK-NEXT: G_STORE [[UV]](i32), [[COPY]](p1) :: (volatile store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF1]](i32), [[COPY]](p1) :: (volatile store (i32), addrspace 1) + %0:_(<33 x i32>) = G_IMPLICIT_DEF + %1:_(i32), %2:_(i32), %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32), %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32), %11:_(i32), %12:_(i32), %13:_(i32), %14:_(i32), %15:_(i32), %16:_(i32), %17:_(i32), %18:_(i32), %19:_(i32), %20:_(i32), %21:_(i32), %22:_(i32), %23:_(i32), %24:_(i32), %25:_(i32), %26:_(i32), %27:_(i32), %28:_(i32), %29:_(i32), %30:_(i32), %31:_(i32), %32:_(i32), %33:_(i32) = G_UNMERGE_VALUES %0(<33 x i32>) %34:_(p1) = COPY $vgpr0_vgpr1 - G_STORE %1, %34 :: (volatile store (s32), align 4, addrspace 1) - G_STORE %33, %34 :: (volatile store (s32), align 4, addrspace 1) + G_STORE %1(i32), %34(p1) :: (volatile store (i32), addrspace 1) + G_STORE %33(i32), %34(p1) :: (volatile store (i32), addrspace 1) ... @@ -337,12 +337,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<32 x s32>), [[DEF]](<32 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[DEF]](<32 x s32>) - %0:_(<64 x s32>) = G_IMPLICIT_DEF - %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 - S_NOP 0, implicit %0, implicit %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x i32>) = G_CONCAT_VECTORS [[DEF]](<32 x i32>), [[DEF]](<32 x i32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x i32>), implicit [[DEF]](<32 x i32>) + %0:_(<64 x i32>) = G_IMPLICIT_DEF + %1:_(<32 x i32>), %2:_(<32 x i32>) = G_UNMERGE_VALUES %0(<64 x i32>) + S_NOP 0, implicit %0(<64 x i32>), implicit %1(<32 x i32>) ... --- @@ -351,11 +351,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v2s1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) - %0:_(<2 x s1>) = G_IMPLICIT_DEF - %1:_(<2 x s32>) = G_ANYEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x i32>) + %0:_(<2 x i1>) = G_IMPLICIT_DEF + %1:_(<2 x i32>) = G_ANYEXT %0(<2 x i1>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -364,11 +364,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s1 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) - %0:_(<3 x s1>) = G_IMPLICIT_DEF - %1:_(<3 x s32>) = G_ANYEXT %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x i32>) + %0:_(<3 x i1>) = G_IMPLICIT_DEF + %1:_(<3 x i32>) = G_ANYEXT %0(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -377,11 +377,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v2s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) - %0:_(<2 x s8>) = G_IMPLICIT_DEF - %1:_(<2 x s32>) = G_ANYEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x i32>) + %0:_(<2 x i8>) = G_IMPLICIT_DEF + %1:_(<2 x i32>) = G_ANYEXT %0(<2 x i8>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -390,13 +390,13 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s8>) = G_IMPLICIT_DEF - %1:_(<3 x s32>) = G_ANYEXT %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i8>) = G_IMPLICIT_DEF + %1:_(<3 x i32>) = G_ANYEXT %0(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -405,10 +405,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v2s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) - %0:_(<2 x s16>) = G_IMPLICIT_DEF - $vgpr0 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x i16>) + %0:_(<2 x i16>) = G_IMPLICIT_DEF + $vgpr0 = COPY %0(<2 x i16>) ... --- @@ -417,31 +417,31 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<4 x s16>) = G_IMPLICIT_DEF - %2:_(<4 x s16>) = G_INSERT %1, %0, 0 - $vgpr0_vgpr1 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<4 x i16>) = G_IMPLICIT_DEF + %2:_(<4 x i16>) = G_INSERT %1, %0(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -450,10 +450,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v4s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<4 x s16>) - %0:_(<4 x s16>) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<4 x i16>) + %0:_(<4 x i16>) = G_IMPLICIT_DEF + $vgpr0_vgpr1 = COPY %0(<4 x i16>) ... --- @@ -462,37 +462,37 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<5 x s16>) = G_IMPLICIT_DEF - %1:_(<8 x s16>) = G_IMPLICIT_DEF - %2:_(<8 x s16>) = G_INSERT %1, %0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<8 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[UV6]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<5 x i16>) = G_IMPLICIT_DEF + %1:_(<8 x i16>) = G_IMPLICIT_DEF + %2:_(<8 x i16>) = G_INSERT %1, %0(<5 x i16>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<8 x i16>) ... --- @@ -501,16 +501,16 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v6s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<6 x s16>) = G_IMPLICIT_DEF - %1:_(<8 x s16>) = G_IMPLICIT_DEF - %2:_(<8 x s16>) = G_INSERT %1, %0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<8 x i16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[UV4]](<2 x i16>), [[UV5]](<2 x i16>), [[UV6]](<2 x i16>), [[UV3]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<6 x i16>) = G_IMPLICIT_DEF + %1:_(<8 x i16>) = G_IMPLICIT_DEF + %2:_(<8 x i16>) = G_INSERT %1, %0(<6 x i16>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<8 x i16>) ... --- @@ -519,10 +519,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v8s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<8 x s16>) - %0:_(<8 x s16>) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<8 x i16>) + %0:_(<8 x i16>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0(<8 x i16>) ... --- @@ -531,10 +531,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v2s64 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<2 x s64>) - %0:_(<2 x s64>) = G_IMPLICIT_DEF - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<2 x i64>) + %0:_(<2 x i64>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0(<2 x i64>) ... --- @@ -543,11 +543,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - $vgpr0 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[DEF]](<4 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + $vgpr0 = COPY %0(<4 x i8>) ... --- @@ -559,7 +559,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p0) %0:_(p0) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p0) ... --- @@ -571,7 +571,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p1) %0:_(p1) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p1) ... --- @@ -583,7 +583,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p2) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p2) %0:_(p2) = G_IMPLICIT_DEF - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p2) ... --- @@ -595,7 +595,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p3) %0:_(p3) = G_IMPLICIT_DEF - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p3) ... --- @@ -607,7 +607,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p4) %0:_(p4) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p4) ... --- @@ -619,7 +619,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p5) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p5) %0:_(p5) = G_IMPLICIT_DEF - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p5) ... --- @@ -631,7 +631,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p999) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p999) %0:_(p999) = G_IMPLICIT_DEF - $vgpr0_vgpr1 = COPY %0 + $vgpr0_vgpr1 = COPY %0(p999) ... @@ -641,12 +641,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v2s1024 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s1024>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<2 x s1024>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024) - %0:_(<2 x s1024>) = G_IMPLICIT_DEF - %1:_(s1024), %2:_(s1024) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i1024>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i1024), [[UV1:%[0-9]+]]:_(i1024) = G_UNMERGE_VALUES [[DEF]](<2 x i1024>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](i1024), implicit [[UV1]](i1024) + %0:_(<2 x i1024>) = G_IMPLICIT_DEF + %1:_(i1024), %2:_(i1024) = G_UNMERGE_VALUES %0(<2 x i1024>) + S_ENDPGM 0, implicit %1(i1024), implicit %2(i1024) ... --- @@ -656,10 +656,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s1024 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1024>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024), [[UV2:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<3 x s1024>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024), implicit [[UV2]](s1024) - %0:_(<3 x s1024>) = G_IMPLICIT_DEF - %1:_(s1024), %2:_(s1024), %3:_(s1024) = G_UNMERGE_VALUES %0 - S_ENDPGM 0, implicit %1, implicit %2, implicit %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x i1024>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i1024), [[UV1:%[0-9]+]]:_(i1024), [[UV2:%[0-9]+]]:_(i1024) = G_UNMERGE_VALUES [[DEF]](<3 x i1024>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](i1024), implicit [[UV1]](i1024), implicit [[UV2]](i1024) + %0:_(<3 x i1024>) = G_IMPLICIT_DEF + %1:_(i1024), %2:_(i1024), %3:_(i1024) = G_UNMERGE_VALUES %0(<3 x i1024>) + S_ENDPGM 0, implicit %1(i1024), implicit %2(i1024), implicit %3(i1024) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index bebbf2a262256..2556433eaa143 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -10,16 +10,16 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY1]](i32), [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -31,16 +31,16 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_1_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -52,15 +52,15 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_2_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 2 + %3:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -73,18 +73,18 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v2s32_varidx_i64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = COPY $vgpr3_vgpr4 - %4:_(s32) = G_TRUNC %2 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x i32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](i32), [[TRUNC]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = COPY $vgpr3_vgpr4 + %3:_(i32) = G_TRUNC %2(i64) + %4:_(<2 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %3(i32) + $vgpr0_vgpr1 = COPY %4(<2 x i32>) ... --- @@ -97,18 +97,18 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v16s32_varidx_i64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17_vgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s64) = COPY $vgpr17_vgpr18 - %4:_(s32) = G_TRUNC %2 - %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $vgpr17_vgpr18 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<16 x i32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](i32), [[TRUNC]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[IVEC]](<16 x i32>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i64) = COPY $vgpr17_vgpr18 + %3:_(i32) = G_TRUNC %2(i64) + %4:_(<16 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %3(i32) + S_ENDPGM 0, implicit %4(<16 x i32>) ... --- @@ -121,16 +121,16 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v16s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(<16 x s64>) = G_IMPLICIT_DEF - %2:_(s32) = G_CONSTANT i32 0 - %3:_(<16 x s64>) = G_INSERT_VECTOR_ELT %1, %0, %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x i64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64), [[UV12:%[0-9]+]]:_(i64), [[UV13:%[0-9]+]]:_(i64), [[UV14:%[0-9]+]]:_(i64), [[UV15:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<16 x i64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i64>) = G_BUILD_VECTOR [[COPY]](i64), [[UV1]](i64), [[UV2]](i64), [[UV3]](i64), [[UV4]](i64), [[UV5]](i64), [[UV6]](i64), [[UV7]](i64), [[UV8]](i64), [[UV9]](i64), [[UV10]](i64), [[UV11]](i64), [[UV12]](i64), [[UV13]](i64), [[UV14]](i64), [[UV15]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x i64>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(<16 x i64>) = G_IMPLICIT_DEF + %2:_(i32) = G_CONSTANT i32 0 + %3:_(<16 x i64>) = G_INSERT_VECTOR_ELT %1, %0(i64), %2(i32) + S_ENDPGM 0, implicit %3(<16 x i64>) ... --- @@ -143,18 +143,18 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(<2 x s8>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %4:_(<2 x s8>) = G_INSERT_VECTOR_ELT %2, %1, %3 - %5:_(<2 x s32>) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(<2 x i8>) = G_IMPLICIT_DEF + %3:_(i32) = G_CONSTANT i32 0 + %4:_(<2 x i8>) = G_INSERT_VECTOR_ELT %2, %1(i8), %3(i32) + %5:_(<2 x i32>) = G_ANYEXT %4(<2 x i8>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -167,17 +167,17 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s32) = G_TRUNC %2 - %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY1]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i64) = G_CONSTANT i64 0 + %3:_(i32) = G_TRUNC %2(i64) + %4:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %3(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<4 x i32>) ... --- @@ -190,101 +190,101 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>), [[UV2:%[0-9]+]]:_(<4 x i32>), [[UV3:%[0-9]+]]:_(<4 x i32>), [[UV4:%[0-9]+]]:_(<4 x i32>), [[UV5:%[0-9]+]]:_(<4 x i32>), [[UV6:%[0-9]+]]:_(<4 x i32>), [[UV7:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x i32>), [[UV9:%[0-9]+]]:_(<4 x i32>), [[UV10:%[0-9]+]]:_(<4 x i32>), [[UV11:%[0-9]+]]:_(<4 x i32>), [[UV12:%[0-9]+]]:_(<4 x i32>), [[UV13:%[0-9]+]]:_(<4 x i32>), [[UV14:%[0-9]+]]:_(<4 x i32>), [[UV15:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CHECK-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[UV2]](<4 x i32>), [[PTR_ADD1]](p1) :: (store (<4 x i32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CHECK-NEXT: G_STORE [[UV3]](<4 x i32>), [[PTR_ADD2]](p1) :: (store (<4 x i32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CHECK-NEXT: G_STORE [[UV4]](<4 x i32>), [[PTR_ADD3]](p1) :: (store (<4 x i32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 80 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CHECK-NEXT: G_STORE [[UV5]](<4 x i32>), [[PTR_ADD4]](p1) :: (store (<4 x i32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 96 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CHECK-NEXT: G_STORE [[UV6]](<4 x i32>), [[PTR_ADD5]](p1) :: (store (<4 x i32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 112 + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CHECK-NEXT: G_STORE [[UV7]](<4 x i32>), [[PTR_ADD6]](p1) :: (store (<4 x i32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CHECK-NEXT: G_STORE [[UV8]](<4 x i32>), [[PTR_ADD7]](p1) :: (store (<4 x i32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; CHECK-NEXT: G_STORE [[UV9]](<4 x i32>), [[PTR_ADD8]](p1) :: (store (<4 x i32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i64) = G_CONSTANT i64 160 + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](i64) + ; CHECK-NEXT: G_STORE [[UV10]](<4 x i32>), [[PTR_ADD9]](p1) :: (store (<4 x i32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i64) = G_CONSTANT i64 176 + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](i64) + ; CHECK-NEXT: G_STORE [[UV11]](<4 x i32>), [[PTR_ADD10]](p1) :: (store (<4 x i32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i64) = G_CONSTANT i64 192 + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](i64) + ; CHECK-NEXT: G_STORE [[UV12]](<4 x i32>), [[PTR_ADD11]](p1) :: (store (<4 x i32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i64) = G_CONSTANT i64 208 + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](i64) + ; CHECK-NEXT: G_STORE [[UV13]](<4 x i32>), [[PTR_ADD12]](p1) :: (store (<4 x i32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i64) = G_CONSTANT i64 224 + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](i64) + ; CHECK-NEXT: G_STORE [[UV14]](<4 x i32>), [[PTR_ADD13]](p1) :: (store (<4 x i32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i64) = G_CONSTANT i64 240 + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](i64) + ; CHECK-NEXT: G_STORE [[UV15]](<4 x i32>), [[PTR_ADD14]](p1) :: (store (<4 x i32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x i32>), [[UV17:%[0-9]+]]:_(<4 x i32>), [[UV18:%[0-9]+]]:_(<4 x i32>), [[UV19:%[0-9]+]]:_(<4 x i32>), [[UV20:%[0-9]+]]:_(<4 x i32>), [[UV21:%[0-9]+]]:_(<4 x i32>), [[UV22:%[0-9]+]]:_(<4 x i32>), [[UV23:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x i32>), [[UV25:%[0-9]+]]:_(<4 x i32>), [[UV26:%[0-9]+]]:_(<4 x i32>), [[UV27:%[0-9]+]]:_(<4 x i32>), [[UV28:%[0-9]+]]:_(<4 x i32>), [[UV29:%[0-9]+]]:_(<4 x i32>), [[UV30:%[0-9]+]]:_(<4 x i32>), [[UV31:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: G_STORE [[UV16]](<4 x i32>), [[COPY1]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; CHECK-NEXT: G_STORE [[UV17]](<4 x i32>), [[PTR_ADD15]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[UV18]](<4 x i32>), [[PTR_ADD16]](p1) :: (store (<4 x i32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](i64) + ; CHECK-NEXT: G_STORE [[UV19]](<4 x i32>), [[PTR_ADD17]](p1) :: (store (<4 x i32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](i64) + ; CHECK-NEXT: G_STORE [[UV20]](<4 x i32>), [[PTR_ADD18]](p1) :: (store (<4 x i32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](i64) + ; CHECK-NEXT: G_STORE [[UV21]](<4 x i32>), [[PTR_ADD19]](p1) :: (store (<4 x i32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](i64) + ; CHECK-NEXT: G_STORE [[UV22]](<4 x i32>), [[PTR_ADD20]](p1) :: (store (<4 x i32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](i64) + ; CHECK-NEXT: G_STORE [[UV23]](<4 x i32>), [[PTR_ADD21]](p1) :: (store (<4 x i32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](i64) + ; CHECK-NEXT: G_STORE [[UV24]](<4 x i32>), [[PTR_ADD22]](p1) :: (store (<4 x i32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](i64) + ; CHECK-NEXT: G_STORE [[UV25]](<4 x i32>), [[PTR_ADD23]](p1) :: (store (<4 x i32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](i64) + ; CHECK-NEXT: G_STORE [[UV26]](<4 x i32>), [[PTR_ADD24]](p1) :: (store (<4 x i32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](i64) + ; CHECK-NEXT: G_STORE [[UV27]](<4 x i32>), [[PTR_ADD25]](p1) :: (store (<4 x i32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](i64) + ; CHECK-NEXT: G_STORE [[UV28]](<4 x i32>), [[PTR_ADD26]](p1) :: (store (<4 x i32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](i64) + ; CHECK-NEXT: G_STORE [[UV29]](<4 x i32>), [[PTR_ADD27]](p1) :: (store (<4 x i32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](i64) + ; CHECK-NEXT: G_STORE [[UV30]](<4 x i32>), [[PTR_ADD28]](p1) :: (store (<4 x i32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](i64) + ; CHECK-NEXT: G_STORE [[UV31]](<4 x i32>), [[PTR_ADD29]](p1) :: (store (<4 x i32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 64 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_CONSTANT i32 12345 - %4:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %1 - %5:_(s32) = G_CONSTANT i32 65 - %6:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %5 + %1:_(i32) = G_CONSTANT i32 64 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_CONSTANT i32 12345 + %4:_(<64 x i32>) = G_INSERT_VECTOR_ELT %2, %3(i32), %1(i32) + %5:_(i32) = G_CONSTANT i32 65 + %6:_(<64 x i32>) = G_INSERT_VECTOR_ELT %2, %3(i32), %5(i32) %7:_(p1) = COPY $vgpr0_vgpr1 %8:_(p1) = COPY $vgpr2_vgpr3 - G_STORE %4, %7 :: (store (<64 x s32>), align 4, addrspace 1) - G_STORE %6, %8 :: (store (<64 x s32>), align 4, addrspace 1) + G_STORE %4(<64 x i32>), %7(p1) :: (store (<64 x i32>), align 4, addrspace 1) + G_STORE %6(<64 x i32>), %8(p1) :: (store (<64 x i32>), align 4, addrspace 1) ... --- @@ -298,74 +298,74 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[C3]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x i32>) from unknown-address + 64, align 4, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x i32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 192 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x i32>) from unknown-address + 192, align 4, addrspace 4) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 12345 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<16 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[C3]](i32), [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x i32>), [[UV17:%[0-9]+]]:_(<4 x i32>), [[UV18:%[0-9]+]]:_(<4 x i32>), [[UV19:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[LOAD2]](<16 x i32>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x i32>), [[UV21:%[0-9]+]]:_(<4 x i32>), [[UV22:%[0-9]+]]:_(<4 x i32>), [[UV23:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[LOAD3]](<16 x i32>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: G_STORE [[UV24]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) - ; CHECK-NEXT: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x i32>), [[UV25:%[0-9]+]]:_(<4 x i32>), [[UV26:%[0-9]+]]:_(<4 x i32>), [[UV27:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[LOAD]](<16 x i32>) + ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x i32>), [[UV29:%[0-9]+]]:_(<4 x i32>), [[UV30:%[0-9]+]]:_(<4 x i32>), [[UV31:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x i32>) + ; CHECK-NEXT: G_STORE [[UV24]](<4 x i32>), [[COPY1]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](i64) + ; CHECK-NEXT: G_STORE [[UV25]](<4 x i32>), [[PTR_ADD3]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](i64) + ; CHECK-NEXT: G_STORE [[UV26]](<4 x i32>), [[PTR_ADD4]](p1) :: (store (<4 x i32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](i64) + ; CHECK-NEXT: G_STORE [[UV27]](<4 x i32>), [[PTR_ADD5]](p1) :: (store (<4 x i32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; CHECK-NEXT: G_STORE [[UV28]](<4 x i32>), [[PTR_ADD6]](p1) :: (store (<4 x i32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 80 + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](i64) + ; CHECK-NEXT: G_STORE [[UV29]](<4 x i32>), [[PTR_ADD7]](p1) :: (store (<4 x i32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 96 + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](i64) + ; CHECK-NEXT: G_STORE [[UV30]](<4 x i32>), [[PTR_ADD8]](p1) :: (store (<4 x i32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i64) = G_CONSTANT i64 112 + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](i64) + ; CHECK-NEXT: G_STORE [[UV31]](<4 x i32>), [[PTR_ADD9]](p1) :: (store (<4 x i32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[PTR_ADD10]](p1) :: (store (<4 x i32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](i64) + ; CHECK-NEXT: G_STORE [[UV17]](<4 x i32>), [[PTR_ADD11]](p1) :: (store (<4 x i32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i64) = G_CONSTANT i64 160 + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](i64) + ; CHECK-NEXT: G_STORE [[UV18]](<4 x i32>), [[PTR_ADD12]](p1) :: (store (<4 x i32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i64) = G_CONSTANT i64 176 + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](i64) + ; CHECK-NEXT: G_STORE [[UV19]](<4 x i32>), [[PTR_ADD13]](p1) :: (store (<4 x i32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](i64) + ; CHECK-NEXT: G_STORE [[UV20]](<4 x i32>), [[PTR_ADD14]](p1) :: (store (<4 x i32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i64) = G_CONSTANT i64 208 + ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](i64) + ; CHECK-NEXT: G_STORE [[UV21]](<4 x i32>), [[PTR_ADD15]](p1) :: (store (<4 x i32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i64) = G_CONSTANT i64 224 + ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](i64) + ; CHECK-NEXT: G_STORE [[UV22]](<4 x i32>), [[PTR_ADD16]](p1) :: (store (<4 x i32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i64) = G_CONSTANT i64 240 + ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](i64) + ; CHECK-NEXT: G_STORE [[UV23]](<4 x i32>), [[PTR_ADD17]](p1) :: (store (<4 x i32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 33 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_CONSTANT i32 12345 - %4:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %1 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_CONSTANT i32 12345 + %4:_(<64 x i32>) = G_INSERT_VECTOR_ELT %2, %3(i32), %1(i32) %5:_(p1) = COPY $vgpr0_vgpr1 - G_STORE %4, %5 :: (store (<64 x s32>), align 4, addrspace 1) + G_STORE %4(<64 x i32>), %5(p1) :: (store (<64 x i32>), align 4, addrspace 1) ... --- @@ -379,412 +379,412 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x i32>) from unknown-address + 64, align 4, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x i32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 192 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x i32>) from unknown-address + 192, align 4, addrspace 4) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 12345 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<16 x i32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<16 x i32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32), [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32), [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32), [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32), [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32), [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32), [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<16 x i32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32), [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32), [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32), [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32), [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32), [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32), [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32), [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD3]](<16 x i32>) + ; CHECK-NEXT: G_STORE [[UV]](i32), [[FRAME_INDEX]](p5) :: (store (i32) into %stack.0, align 256, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK-NEXT: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[UV1]](i32), [[COPY2]](p5) :: (store (i32) into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](i32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK-NEXT: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) + ; CHECK-NEXT: G_STORE [[UV2]](i32), [[COPY3]](p5) :: (store (i32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](i32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK-NEXT: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) + ; CHECK-NEXT: G_STORE [[UV3]](i32), [[COPY4]](p5) :: (store (i32) into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](i32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) + ; CHECK-NEXT: G_STORE [[UV4]](i32), [[COPY5]](p5) :: (store (i32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](i32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK-NEXT: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) + ; CHECK-NEXT: G_STORE [[UV5]](i32), [[COPY6]](p5) :: (store (i32) into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](i32) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) + ; CHECK-NEXT: G_STORE [[UV6]](i32), [[COPY7]](p5) :: (store (i32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](i32) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK-NEXT: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) + ; CHECK-NEXT: G_STORE [[UV7]](i32), [[COPY8]](p5) :: (store (i32) into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](i32) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK-NEXT: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) + ; CHECK-NEXT: G_STORE [[UV8]](i32), [[COPY9]](p5) :: (store (i32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](i32) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK-NEXT: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) + ; CHECK-NEXT: G_STORE [[UV9]](i32), [[COPY10]](p5) :: (store (i32) into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](i32) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK-NEXT: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) + ; CHECK-NEXT: G_STORE [[UV10]](i32), [[COPY11]](p5) :: (store (i32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](i32) ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK-NEXT: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) + ; CHECK-NEXT: G_STORE [[UV11]](i32), [[COPY12]](p5) :: (store (i32) into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](i32) ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK-NEXT: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) + ; CHECK-NEXT: G_STORE [[UV12]](i32), [[COPY13]](p5) :: (store (i32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](i32) ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK-NEXT: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) + ; CHECK-NEXT: G_STORE [[UV13]](i32), [[COPY14]](p5) :: (store (i32) into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](i32) ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK-NEXT: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) + ; CHECK-NEXT: G_STORE [[UV14]](i32), [[COPY15]](p5) :: (store (i32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](i32) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK-NEXT: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) + ; CHECK-NEXT: G_STORE [[UV15]](i32), [[COPY16]](p5) :: (store (i32) into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](i32) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) + ; CHECK-NEXT: G_STORE [[UV16]](i32), [[COPY17]](p5) :: (store (i32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 68 + ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](i32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK-NEXT: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) + ; CHECK-NEXT: G_STORE [[UV17]](i32), [[COPY18]](p5) :: (store (i32) into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 72 + ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](i32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK-NEXT: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) + ; CHECK-NEXT: G_STORE [[UV18]](i32), [[COPY19]](p5) :: (store (i32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 76 + ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](i32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK-NEXT: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) + ; CHECK-NEXT: G_STORE [[UV19]](i32), [[COPY20]](p5) :: (store (i32) into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 80 + ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](i32) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK-NEXT: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) + ; CHECK-NEXT: G_STORE [[UV20]](i32), [[COPY21]](p5) :: (store (i32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 84 + ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](i32) ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK-NEXT: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) + ; CHECK-NEXT: G_STORE [[UV21]](i32), [[COPY22]](p5) :: (store (i32) into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 88 + ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](i32) ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK-NEXT: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) + ; CHECK-NEXT: G_STORE [[UV22]](i32), [[COPY23]](p5) :: (store (i32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 92 + ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](i32) ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK-NEXT: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) + ; CHECK-NEXT: G_STORE [[UV23]](i32), [[COPY24]](p5) :: (store (i32) into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 96 + ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](i32) ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK-NEXT: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) + ; CHECK-NEXT: G_STORE [[UV24]](i32), [[COPY25]](p5) :: (store (i32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](i32) ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK-NEXT: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) + ; CHECK-NEXT: G_STORE [[UV25]](i32), [[COPY26]](p5) :: (store (i32) into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 104 + ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](i32) ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK-NEXT: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) + ; CHECK-NEXT: G_STORE [[UV26]](i32), [[COPY27]](p5) :: (store (i32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 108 + ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](i32) ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK-NEXT: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) + ; CHECK-NEXT: G_STORE [[UV27]](i32), [[COPY28]](p5) :: (store (i32) into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C31:%[0-9]+]]:_(i32) = G_CONSTANT i32 112 + ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](i32) ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK-NEXT: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) + ; CHECK-NEXT: G_STORE [[UV28]](i32), [[COPY29]](p5) :: (store (i32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C32:%[0-9]+]]:_(i32) = G_CONSTANT i32 116 + ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](i32) ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK-NEXT: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) + ; CHECK-NEXT: G_STORE [[UV29]](i32), [[COPY30]](p5) :: (store (i32) into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C33:%[0-9]+]]:_(i32) = G_CONSTANT i32 120 + ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](i32) ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK-NEXT: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) + ; CHECK-NEXT: G_STORE [[UV30]](i32), [[COPY31]](p5) :: (store (i32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C34:%[0-9]+]]:_(i32) = G_CONSTANT i32 124 + ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](i32) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) + ; CHECK-NEXT: G_STORE [[UV31]](i32), [[COPY32]](p5) :: (store (i32) into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C35:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](i32) ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) + ; CHECK-NEXT: G_STORE [[UV32]](i32), [[COPY33]](p5) :: (store (i32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C36:%[0-9]+]]:_(i32) = G_CONSTANT i32 132 + ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](i32) ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) + ; CHECK-NEXT: G_STORE [[UV33]](i32), [[COPY34]](p5) :: (store (i32) into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C37:%[0-9]+]]:_(i32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](i32) ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) + ; CHECK-NEXT: G_STORE [[UV34]](i32), [[COPY35]](p5) :: (store (i32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C38:%[0-9]+]]:_(i32) = G_CONSTANT i32 140 + ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](i32) ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) + ; CHECK-NEXT: G_STORE [[UV35]](i32), [[COPY36]](p5) :: (store (i32) into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C39:%[0-9]+]]:_(i32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](i32) ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK-NEXT: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) + ; CHECK-NEXT: G_STORE [[UV36]](i32), [[COPY37]](p5) :: (store (i32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C40:%[0-9]+]]:_(i32) = G_CONSTANT i32 148 + ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](i32) ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK-NEXT: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) + ; CHECK-NEXT: G_STORE [[UV37]](i32), [[COPY38]](p5) :: (store (i32) into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C41:%[0-9]+]]:_(i32) = G_CONSTANT i32 152 + ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](i32) ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK-NEXT: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) + ; CHECK-NEXT: G_STORE [[UV38]](i32), [[COPY39]](p5) :: (store (i32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C42:%[0-9]+]]:_(i32) = G_CONSTANT i32 156 + ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](i32) ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK-NEXT: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) + ; CHECK-NEXT: G_STORE [[UV39]](i32), [[COPY40]](p5) :: (store (i32) into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C43:%[0-9]+]]:_(i32) = G_CONSTANT i32 160 + ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](i32) ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK-NEXT: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) + ; CHECK-NEXT: G_STORE [[UV40]](i32), [[COPY41]](p5) :: (store (i32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C44:%[0-9]+]]:_(i32) = G_CONSTANT i32 164 + ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](i32) ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK-NEXT: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) + ; CHECK-NEXT: G_STORE [[UV41]](i32), [[COPY42]](p5) :: (store (i32) into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C45:%[0-9]+]]:_(i32) = G_CONSTANT i32 168 + ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](i32) ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK-NEXT: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) + ; CHECK-NEXT: G_STORE [[UV42]](i32), [[COPY43]](p5) :: (store (i32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C46:%[0-9]+]]:_(i32) = G_CONSTANT i32 172 + ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](i32) ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK-NEXT: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) + ; CHECK-NEXT: G_STORE [[UV43]](i32), [[COPY44]](p5) :: (store (i32) into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C47:%[0-9]+]]:_(i32) = G_CONSTANT i32 176 + ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](i32) ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK-NEXT: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) + ; CHECK-NEXT: G_STORE [[UV44]](i32), [[COPY45]](p5) :: (store (i32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C48:%[0-9]+]]:_(i32) = G_CONSTANT i32 180 + ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](i32) ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK-NEXT: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) + ; CHECK-NEXT: G_STORE [[UV45]](i32), [[COPY46]](p5) :: (store (i32) into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C49:%[0-9]+]]:_(i32) = G_CONSTANT i32 184 + ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](i32) ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK-NEXT: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) + ; CHECK-NEXT: G_STORE [[UV46]](i32), [[COPY47]](p5) :: (store (i32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C50:%[0-9]+]]:_(i32) = G_CONSTANT i32 188 + ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](i32) ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK-NEXT: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) + ; CHECK-NEXT: G_STORE [[UV47]](i32), [[COPY48]](p5) :: (store (i32) into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C51:%[0-9]+]]:_(i32) = G_CONSTANT i32 192 + ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](i32) ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK-NEXT: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) + ; CHECK-NEXT: G_STORE [[UV48]](i32), [[COPY49]](p5) :: (store (i32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C52:%[0-9]+]]:_(i32) = G_CONSTANT i32 196 + ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](i32) ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK-NEXT: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) + ; CHECK-NEXT: G_STORE [[UV49]](i32), [[COPY50]](p5) :: (store (i32) into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C53:%[0-9]+]]:_(i32) = G_CONSTANT i32 200 + ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](i32) ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK-NEXT: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) + ; CHECK-NEXT: G_STORE [[UV50]](i32), [[COPY51]](p5) :: (store (i32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C54:%[0-9]+]]:_(i32) = G_CONSTANT i32 204 + ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](i32) ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK-NEXT: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) + ; CHECK-NEXT: G_STORE [[UV51]](i32), [[COPY52]](p5) :: (store (i32) into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C55:%[0-9]+]]:_(i32) = G_CONSTANT i32 208 + ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](i32) ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK-NEXT: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) + ; CHECK-NEXT: G_STORE [[UV52]](i32), [[COPY53]](p5) :: (store (i32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C56:%[0-9]+]]:_(i32) = G_CONSTANT i32 212 + ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](i32) ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK-NEXT: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) + ; CHECK-NEXT: G_STORE [[UV53]](i32), [[COPY54]](p5) :: (store (i32) into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C57:%[0-9]+]]:_(i32) = G_CONSTANT i32 216 + ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](i32) ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK-NEXT: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) + ; CHECK-NEXT: G_STORE [[UV54]](i32), [[COPY55]](p5) :: (store (i32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C58:%[0-9]+]]:_(i32) = G_CONSTANT i32 220 + ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](i32) ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK-NEXT: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) + ; CHECK-NEXT: G_STORE [[UV55]](i32), [[COPY56]](p5) :: (store (i32) into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C59:%[0-9]+]]:_(i32) = G_CONSTANT i32 224 + ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](i32) ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK-NEXT: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) + ; CHECK-NEXT: G_STORE [[UV56]](i32), [[COPY57]](p5) :: (store (i32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C60:%[0-9]+]]:_(i32) = G_CONSTANT i32 228 + ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](i32) ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK-NEXT: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) + ; CHECK-NEXT: G_STORE [[UV57]](i32), [[COPY58]](p5) :: (store (i32) into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C61:%[0-9]+]]:_(i32) = G_CONSTANT i32 232 + ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](i32) ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK-NEXT: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) + ; CHECK-NEXT: G_STORE [[UV58]](i32), [[COPY59]](p5) :: (store (i32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C62:%[0-9]+]]:_(i32) = G_CONSTANT i32 236 + ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](i32) ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK-NEXT: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) + ; CHECK-NEXT: G_STORE [[UV59]](i32), [[COPY60]](p5) :: (store (i32) into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C63:%[0-9]+]]:_(i32) = G_CONSTANT i32 240 + ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](i32) ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK-NEXT: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) + ; CHECK-NEXT: G_STORE [[UV60]](i32), [[COPY61]](p5) :: (store (i32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C64:%[0-9]+]]:_(i32) = G_CONSTANT i32 244 + ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](i32) ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK-NEXT: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) + ; CHECK-NEXT: G_STORE [[UV61]](i32), [[COPY62]](p5) :: (store (i32) into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C65:%[0-9]+]]:_(i32) = G_CONSTANT i32 248 + ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](i32) ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK-NEXT: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) + ; CHECK-NEXT: G_STORE [[UV62]](i32), [[COPY63]](p5) :: (store (i32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C66:%[0-9]+]]:_(i32) = G_CONSTANT i32 252 + ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](i32) ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK-NEXT: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C67]] - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C4]] - ; CHECK-NEXT: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s32) - ; CHECK-NEXT: G_STORE [[C3]](s32), [[PTR_ADD66]](p5) :: (store (s32), addrspace 5) - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32), align 256, addrspace 5) - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s32) from unknown-address + 64, align 64, addrspace 5) - ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s32) from unknown-address + 68, addrspace 5) - ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s32) from unknown-address + 72, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s32) from unknown-address + 76, addrspace 5) - ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s32) from unknown-address + 80, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load (s32) from unknown-address + 84, addrspace 5) - ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load (s32) from unknown-address + 88, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load (s32) from unknown-address + 92, addrspace 5) - ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load (s32) from unknown-address + 96, align 32, addrspace 5) - ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load (s32) from unknown-address + 100, addrspace 5) - ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load (s32) from unknown-address + 104, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load (s32) from unknown-address + 108, addrspace 5) - ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load (s32) from unknown-address + 112, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load (s32) from unknown-address + 116, addrspace 5) - ; CHECK-NEXT: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load (s32) from unknown-address + 120, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load (s32) from unknown-address + 124, addrspace 5) - ; CHECK-NEXT: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load (s32) from unknown-address + 128, align 128, addrspace 5) - ; CHECK-NEXT: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load (s32) from unknown-address + 132, addrspace 5) - ; CHECK-NEXT: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load (s32) from unknown-address + 136, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load (s32) from unknown-address + 140, addrspace 5) - ; CHECK-NEXT: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load (s32) from unknown-address + 144, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load (s32) from unknown-address + 148, addrspace 5) - ; CHECK-NEXT: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load (s32) from unknown-address + 152, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load (s32) from unknown-address + 156, addrspace 5) - ; CHECK-NEXT: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load (s32) from unknown-address + 160, align 32, addrspace 5) - ; CHECK-NEXT: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load (s32) from unknown-address + 164, addrspace 5) - ; CHECK-NEXT: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load (s32) from unknown-address + 168, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load (s32) from unknown-address + 172, addrspace 5) - ; CHECK-NEXT: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load (s32) from unknown-address + 176, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load (s32) from unknown-address + 180, addrspace 5) - ; CHECK-NEXT: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load (s32) from unknown-address + 184, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load (s32) from unknown-address + 188, addrspace 5) - ; CHECK-NEXT: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load (s32) from unknown-address + 192, align 64, addrspace 5) - ; CHECK-NEXT: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load (s32) from unknown-address + 196, addrspace 5) - ; CHECK-NEXT: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load (s32) from unknown-address + 200, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load (s32) from unknown-address + 204, addrspace 5) - ; CHECK-NEXT: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load (s32) from unknown-address + 208, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load (s32) from unknown-address + 212, addrspace 5) - ; CHECK-NEXT: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load (s32) from unknown-address + 216, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load (s32) from unknown-address + 220, addrspace 5) - ; CHECK-NEXT: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load (s32) from unknown-address + 224, align 32, addrspace 5) - ; CHECK-NEXT: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load (s32) from unknown-address + 228, addrspace 5) - ; CHECK-NEXT: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load (s32) from unknown-address + 232, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load (s32) from unknown-address + 236, addrspace 5) - ; CHECK-NEXT: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load (s32) from unknown-address + 240, align 16, addrspace 5) - ; CHECK-NEXT: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5) - ; CHECK-NEXT: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5) - ; CHECK-NEXT: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) + ; CHECK-NEXT: G_STORE [[UV63]](i32), [[COPY64]](p5) :: (store (i32) into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK-NEXT: [[C67:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C67]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[C4]] + ; CHECK-NEXT: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](i32) + ; CHECK-NEXT: G_STORE [[C3]](i32), [[PTR_ADD66]](p5) :: (store (i32), addrspace 5) + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (i32), align 256, addrspace 5) + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD15]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD16]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD17]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i32) from unknown-address + 64, align 64, addrspace 5) + ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD19]](p5) :: (load (i32) from unknown-address + 68, addrspace 5) + ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD20]](p5) :: (load (i32) from unknown-address + 72, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD21]](p5) :: (load (i32) from unknown-address + 76, addrspace 5) + ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i32) from unknown-address + 80, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD23]](p5) :: (load (i32) from unknown-address + 84, addrspace 5) + ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD24]](p5) :: (load (i32) from unknown-address + 88, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD25]](p5) :: (load (i32) from unknown-address + 92, addrspace 5) + ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p5) :: (load (i32) from unknown-address + 96, align 32, addrspace 5) + ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD27]](p5) :: (load (i32) from unknown-address + 100, addrspace 5) + ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD28]](p5) :: (load (i32) from unknown-address + 104, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD29]](p5) :: (load (i32) from unknown-address + 108, addrspace 5) + ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p5) :: (load (i32) from unknown-address + 112, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD33:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD31]](p5) :: (load (i32) from unknown-address + 116, addrspace 5) + ; CHECK-NEXT: [[LOAD34:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD32]](p5) :: (load (i32) from unknown-address + 120, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD35:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD33]](p5) :: (load (i32) from unknown-address + 124, addrspace 5) + ; CHECK-NEXT: [[LOAD36:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD34]](p5) :: (load (i32) from unknown-address + 128, align 128, addrspace 5) + ; CHECK-NEXT: [[LOAD37:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD35]](p5) :: (load (i32) from unknown-address + 132, addrspace 5) + ; CHECK-NEXT: [[LOAD38:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD36]](p5) :: (load (i32) from unknown-address + 136, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD39:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD37]](p5) :: (load (i32) from unknown-address + 140, addrspace 5) + ; CHECK-NEXT: [[LOAD40:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD38]](p5) :: (load (i32) from unknown-address + 144, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD41:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD39]](p5) :: (load (i32) from unknown-address + 148, addrspace 5) + ; CHECK-NEXT: [[LOAD42:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD40]](p5) :: (load (i32) from unknown-address + 152, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD43:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD41]](p5) :: (load (i32) from unknown-address + 156, addrspace 5) + ; CHECK-NEXT: [[LOAD44:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD42]](p5) :: (load (i32) from unknown-address + 160, align 32, addrspace 5) + ; CHECK-NEXT: [[LOAD45:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD43]](p5) :: (load (i32) from unknown-address + 164, addrspace 5) + ; CHECK-NEXT: [[LOAD46:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD44]](p5) :: (load (i32) from unknown-address + 168, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD47:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD45]](p5) :: (load (i32) from unknown-address + 172, addrspace 5) + ; CHECK-NEXT: [[LOAD48:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD46]](p5) :: (load (i32) from unknown-address + 176, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD49:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD47]](p5) :: (load (i32) from unknown-address + 180, addrspace 5) + ; CHECK-NEXT: [[LOAD50:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD48]](p5) :: (load (i32) from unknown-address + 184, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD51:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD49]](p5) :: (load (i32) from unknown-address + 188, addrspace 5) + ; CHECK-NEXT: [[LOAD52:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD50]](p5) :: (load (i32) from unknown-address + 192, align 64, addrspace 5) + ; CHECK-NEXT: [[LOAD53:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD51]](p5) :: (load (i32) from unknown-address + 196, addrspace 5) + ; CHECK-NEXT: [[LOAD54:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD52]](p5) :: (load (i32) from unknown-address + 200, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD55:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD53]](p5) :: (load (i32) from unknown-address + 204, addrspace 5) + ; CHECK-NEXT: [[LOAD56:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD54]](p5) :: (load (i32) from unknown-address + 208, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD57:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD55]](p5) :: (load (i32) from unknown-address + 212, addrspace 5) + ; CHECK-NEXT: [[LOAD58:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD56]](p5) :: (load (i32) from unknown-address + 216, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD59:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD57]](p5) :: (load (i32) from unknown-address + 220, addrspace 5) + ; CHECK-NEXT: [[LOAD60:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD58]](p5) :: (load (i32) from unknown-address + 224, align 32, addrspace 5) + ; CHECK-NEXT: [[LOAD61:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD59]](p5) :: (load (i32) from unknown-address + 228, addrspace 5) + ; CHECK-NEXT: [[LOAD62:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD60]](p5) :: (load (i32) from unknown-address + 232, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD63:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD61]](p5) :: (load (i32) from unknown-address + 236, addrspace 5) + ; CHECK-NEXT: [[LOAD64:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD62]](p5) :: (load (i32) from unknown-address + 240, align 16, addrspace 5) + ; CHECK-NEXT: [[LOAD65:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD63]](p5) :: (load (i32) from unknown-address + 244, addrspace 5) + ; CHECK-NEXT: [[LOAD66:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD64]](p5) :: (load (i32) from unknown-address + 248, align 8, addrspace 5) + ; CHECK-NEXT: [[LOAD67:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD65]](p5) :: (load (i32) from unknown-address + 252, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD16]](i32), [[LOAD17]](i32), [[LOAD18]](i32), [[LOAD19]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD20]](i32), [[LOAD21]](i32), [[LOAD22]](i32), [[LOAD23]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD24]](i32), [[LOAD25]](i32), [[LOAD26]](i32), [[LOAD27]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD28]](i32), [[LOAD29]](i32), [[LOAD30]](i32), [[LOAD31]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD32]](i32), [[LOAD33]](i32), [[LOAD34]](i32), [[LOAD35]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD36]](i32), [[LOAD37]](i32), [[LOAD38]](i32), [[LOAD39]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD40]](i32), [[LOAD41]](i32), [[LOAD42]](i32), [[LOAD43]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD44]](i32), [[LOAD45]](i32), [[LOAD46]](i32), [[LOAD47]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD48]](i32), [[LOAD49]](i32), [[LOAD50]](i32), [[LOAD51]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD52]](i32), [[LOAD53]](i32), [[LOAD54]](i32), [[LOAD55]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD56]](i32), [[LOAD57]](i32), [[LOAD58]](i32), [[LOAD59]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD60]](i32), [[LOAD61]](i32), [[LOAD62]](i32), [[LOAD63]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD64]](i32), [[LOAD65]](i32), [[LOAD66]](i32), [[LOAD67]](i32) ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; CHECK-NEXT: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) - ; CHECK-NEXT: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) - ; CHECK-NEXT: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) - ; CHECK-NEXT: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) - ; CHECK-NEXT: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) - ; CHECK-NEXT: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) - ; CHECK-NEXT: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) - ; CHECK-NEXT: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) - ; CHECK-NEXT: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) - ; CHECK-NEXT: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) - ; CHECK-NEXT: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY65]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: [[C68:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD67]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: [[C69:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x i32>), [[PTR_ADD68]](p1) :: (store (<4 x i32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: [[C70:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x i32>), [[PTR_ADD69]](p1) :: (store (<4 x i32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x i32>), [[PTR_ADD70]](p1) :: (store (<4 x i32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: [[C71:%[0-9]+]]:_(i64) = G_CONSTANT i64 80 + ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x i32>), [[PTR_ADD71]](p1) :: (store (<4 x i32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: [[C72:%[0-9]+]]:_(i64) = G_CONSTANT i64 96 + ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x i32>), [[PTR_ADD72]](p1) :: (store (<4 x i32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: [[C73:%[0-9]+]]:_(i64) = G_CONSTANT i64 112 + ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x i32>), [[PTR_ADD73]](p1) :: (store (<4 x i32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<4 x i32>), [[PTR_ADD74]](p1) :: (store (<4 x i32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: [[C74:%[0-9]+]]:_(i64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR9]](<4 x i32>), [[PTR_ADD75]](p1) :: (store (<4 x i32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: [[C75:%[0-9]+]]:_(i64) = G_CONSTANT i64 160 + ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR10]](<4 x i32>), [[PTR_ADD76]](p1) :: (store (<4 x i32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: [[C76:%[0-9]+]]:_(i64) = G_CONSTANT i64 176 + ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR11]](<4 x i32>), [[PTR_ADD77]](p1) :: (store (<4 x i32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR12]](<4 x i32>), [[PTR_ADD78]](p1) :: (store (<4 x i32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: [[C77:%[0-9]+]]:_(i64) = G_CONSTANT i64 208 + ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR13]](<4 x i32>), [[PTR_ADD79]](p1) :: (store (<4 x i32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: [[C78:%[0-9]+]]:_(i64) = G_CONSTANT i64 224 + ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR14]](<4 x i32>), [[PTR_ADD80]](p1) :: (store (<4 x i32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: [[C79:%[0-9]+]]:_(i64) = G_CONSTANT i64 240 + ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](i64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR15]](<4 x i32>), [[PTR_ADD81]](p1) :: (store (<4 x i32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) - %3:_(s32) = G_CONSTANT i32 12345 - %4:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %1 + %1:_(i32) = COPY $sgpr2 + %2:_(<64 x i32>) = G_LOAD %0(p1) :: (load (<64 x i32>), align 4, addrspace 4) + %3:_(i32) = G_CONSTANT i32 12345 + %4:_(<64 x i32>) = G_INSERT_VECTOR_ELT %2, %3(i32), %1(i32) %5:_(p1) = COPY $vgpr0_vgpr1 - G_STORE %4, %5 :: (store (<64 x s32>), align 4, addrspace 1) + G_STORE %4(<64 x i32>), %5(p1) :: (store (<64 x i32>), align 4, addrspace 1) ... --- @@ -796,29 +796,29 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_varidx_v4s8 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[SHL]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[SHL]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(<4 x s8>) = G_BITCAST %0 - %4:_(s8) = G_TRUNC %1 - %5:_(<4 x s8>) = G_INSERT_VECTOR_ELT %3, %4, %2 - %6:_(s32) = G_BITCAST %5 - $vgpr0 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[SHL]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[SHL]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL2]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<4 x i8>) = G_BITCAST %0(i32) + %4:_(i8) = G_TRUNC %1(i32) + %5:_(<4 x i8>) = G_INSERT_VECTOR_ELT %3, %4(i8), %2(i32) + %6:_(i32) = G_BITCAST %5(<4 x i8>) + $vgpr0 = COPY %6(i32) ... --- @@ -830,35 +830,35 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_varidx_v8s8 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[SHL]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[SHL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C3]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EVEC]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC]](<2 x s32>) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = COPY $vgpr3 - %3:_(<8 x s8>) = G_BITCAST %0 - %4:_(s8) = G_TRUNC %1 - %5:_(<8 x s8>) = G_INSERT_VECTOR_ELT %3, %4, %2 - %6:_(s64) = G_BITCAST %5 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x i32>), [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[SHL]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[SHL]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL2]], [[C3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[EVEC]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x i32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR]](i32), [[LSHR]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[IVEC]](<2 x i32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = COPY $vgpr3 + %3:_(<8 x i8>) = G_BITCAST %0(i64) + %4:_(i8) = G_TRUNC %1(i32) + %5:_(<8 x i8>) = G_INSERT_VECTOR_ELT %3, %4(i8), %2(i32) + %6:_(i64) = G_BITCAST %5(<8 x i8>) + $vgpr0_vgpr1 = COPY %6(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index d8d0f9b9cd898..aa33213616a62 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_insert_s64_s32_offset32 @@ -28,14 +28,14 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[COPY1]](i32), 32 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -47,14 +47,14 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s32_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_INSERT %0, %1, 16 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[COPY1]](i32), 16 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 16 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -66,14 +66,14 @@ body: | ; CHECK-LABEL: name: test_insert_s96_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s96) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i96) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i96) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... --- name: test_insert_s96_s32_offset32 @@ -84,14 +84,14 @@ body: | ; CHECK-LABEL: name: test_insert_s96_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s96) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i96) = G_INSERT [[COPY]], [[COPY1]](i32), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i96) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... --- name: test_insert_s96_s32_offset64 @@ -102,14 +102,14 @@ body: | ; CHECK-LABEL: name: test_insert_s96_s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s96) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i96) = G_INSERT [[COPY]], [[COPY1]](i32), 64 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i96) = G_INSERT %0, %1(i32), 64 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... --- name: test_insert_s128_s32_offset0 @@ -120,14 +120,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s32_offset32 @@ -138,14 +138,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i32), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s32_offset64 @@ -156,14 +156,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i32), 64 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_INSERT %0, %1(i32), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s32_offset96 @@ -174,14 +174,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s32_offset96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 96 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_INSERT %0, %1, 96 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i32), 96 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_INSERT %0, %1(i32), 96 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s64_offset0 @@ -192,14 +192,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s64_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = COPY $vgpr4_vgpr5 - %2:_(s128) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i64), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = COPY $vgpr4_vgpr5 + %2:_(i128) = G_INSERT %0, %1(i64), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s64_offset32 @@ -210,14 +210,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s64_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = COPY $vgpr4_vgpr5 - %2:_(s128) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i64), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = COPY $vgpr4_vgpr5 + %2:_(i128) = G_INSERT %0, %1(i64), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s64_offset64 @@ -228,14 +228,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s64_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = COPY $vgpr4_vgpr5 - %2:_(s128) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i64), 64 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = COPY $vgpr4_vgpr5 + %2:_(i128) = G_INSERT %0, %1(i64), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s96_offset0 @@ -246,14 +246,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s96_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s96), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(s128) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i96), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + %2:_(i128) = G_INSERT %0, %1(i96), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_s96_offset32 @@ -264,14 +264,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s96_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s96), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(s128) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](i96), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + %2:_(i128) = G_INSERT %0, %1(i96), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_p0_s32_offset0 @@ -283,13 +283,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](s32), 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](i32), 0 ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(p0) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1 = COPY %2 + %1:_(i32) = COPY $vgpr2 + %2:_(p0) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1 = COPY %2(p0) ... --- name: test_insert_p0_s32_offset32 @@ -301,13 +301,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](s32), 32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](i32), 32 ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(p0) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1 = COPY %2 + %1:_(i32) = COPY $vgpr2 + %2:_(p0) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1 = COPY %2(p0) ... --- name: test_insert_s128_p0_offset0 @@ -318,14 +318,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_p0_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](p0), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = COPY $vgpr4_vgpr5 - %2:_(s128) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(i128) = G_INSERT %0, %1(p0), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_p0_offset32 @@ -336,14 +336,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_p0_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](p0), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = COPY $vgpr4_vgpr5 - %2:_(s128) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(i128) = G_INSERT %0, %1(p0), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- name: test_insert_s128_p0_offset64 @@ -354,14 +354,14 @@ body: | ; CHECK-LABEL: name: test_insert_s128_p0_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[COPY1]](p0), 64 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = COPY $vgpr4_vgpr5 - %2:_(s128) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(i128) = G_INSERT %0, %1(p0), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -373,16 +373,16 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s16) = G_TRUNC %1 - %3:_(s128) = G_INSERT %0, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[TRUNC]](i16), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i128) = G_INSERT %0, %2(i16), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -394,16 +394,16 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s16) = G_TRUNC %1 - %3:_(s128) = G_INSERT %0, %2, 16 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[TRUNC]](i16), 16 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i128) = G_INSERT %0, %2(i16), 16 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -415,16 +415,16 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s16_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s16) = G_TRUNC %1 - %3:_(s128) = G_INSERT %0, %2, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[TRUNC]](i16), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i128) = G_INSERT %0, %2(i16), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -436,16 +436,16 @@ body: | ; CHECK-LABEL: name: test_insert_s128_s16_offset112 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 112 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s16) = G_TRUNC %1 - %3:_(s128) = G_INSERT %0, %2, 112 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i128) = G_INSERT [[COPY]], [[TRUNC]](i16), 112 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i128) = G_INSERT %0, %2(i16), 112 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -457,15 +457,15 @@ body: | ; CHECK-LABEL: name: test_insert_v2s32_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<2 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY1]](i32), [[UV1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(<2 x i32>) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- name: test_insert_v2s32_s32_offset32 @@ -476,15 +476,15 @@ body: | ; CHECK-LABEL: name: test_insert_v2s32_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<2 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(<2 x i32>) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- name: test_insert_v3s32_s32_offset0 @@ -495,15 +495,15 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(<3 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY1]](i32), [[UV1]](i32), [[UV2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(<3 x i32>) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- name: test_insert_v3s32_s32_offset32 @@ -514,15 +514,15 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(<3 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[COPY1]](i32), [[UV2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(<3 x i32>) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- name: test_insert_v3s32_s32_offset64 @@ -533,15 +533,15 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(<3 x s32>) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(<3 x i32>) = G_INSERT %0, %1(i32), 64 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- name: test_insert_v4s32_s32_offset0 @@ -552,15 +552,15 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(<4 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[COPY1]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(<4 x i32>) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s32_offset32 @@ -571,15 +571,15 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(<4 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[COPY1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(<4 x i32>) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s32_offset64 @@ -590,15 +590,15 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(<4 x s32>) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[COPY1]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(<4 x i32>) = G_INSERT %0, %1(i32), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s32_offset96 @@ -609,15 +609,15 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(<4 x s32>) = G_INSERT %0, %1, 96 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(<4 x i32>) = G_INSERT %0, %1(i32), 96 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s64_offset0 @@ -628,16 +628,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = COPY $vgpr4_vgpr5 + %2:_(<4 x i32>) = G_INSERT %0, %1(i64), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s64_offset32 @@ -648,16 +648,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV4]](i32), [[UV5]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = COPY $vgpr4_vgpr5 + %2:_(<4 x i32>) = G_INSERT %0, %1(i64), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s64_offset64 @@ -668,16 +668,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s64) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV4]](i32), [[UV5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i64) = COPY $vgpr4_vgpr5 + %2:_(<4 x i32>) = G_INSERT %0, %1(i64), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s96_offset0 @@ -688,16 +688,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s96_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(<4 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + %2:_(<4 x i32>) = G_INSERT %0, %1(i96), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_s96_offset32 @@ -708,16 +708,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s96_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(<4 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i96) = COPY $vgpr4_vgpr5_vgpr6 + %2:_(<4 x i32>) = G_INSERT %0, %1(i96), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_v2s32_offset0 @@ -728,16 +728,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<4 x i32>) = G_INSERT %0, %1(<2 x i32>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_v2s32_offset32 @@ -748,16 +748,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV4]](i32), [[UV5]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<4 x i32>) = G_INSERT %0, %1(<2 x i32>), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_v2s32_offset64 @@ -768,16 +768,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV4]](i32), [[UV5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<4 x i32>) = G_INSERT %0, %1(<2 x i32>), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_v3s32_offset0 @@ -788,16 +788,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(<4 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + %2:_(<4 x i32>) = G_INSERT %0, %1(<3 x i32>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_v3s32_offset32 @@ -808,16 +808,16 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(<4 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + %2:_(<4 x i32>) = G_INSERT %0, %1(<3 x i32>), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_p0_offset0 @@ -828,14 +828,14 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_p0_offset0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x i32>) = G_INSERT [[COPY]], [[COPY1]](p0), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(<4 x i32>) = G_INSERT %0, %1(p0), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_p0_offset32 @@ -846,14 +846,14 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_p0_offset32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x i32>) = G_INSERT [[COPY]], [[COPY1]](p0), 32 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(<4 x i32>) = G_INSERT %0, %1(p0), 32 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- name: test_insert_v4s32_p0_offset64 @@ -864,14 +864,14 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_p0_offset64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x i32>) = G_INSERT [[COPY]], [[COPY1]](p0), 64 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = COPY $vgpr4_vgpr5 - %2:_(<4 x s32>) = G_INSERT %0, %1, 64 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %2:_(<4 x i32>) = G_INSERT %0, %1(p0), 64 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -883,22 +883,22 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(<2 x s16>) = G_INSERT %0, %2, 0 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<2 x i16>) = G_INSERT %0, %2(i16), 0 + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -910,23 +910,23 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(<2 x s16>) = G_INSERT %0, %2, 1 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -131071 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<2 x i16>) = G_INSERT %0, %2(i16), 1 + $vgpr0 = COPY %3(<2 x i16>) ... --- name: test_insert_v2s16_s16_offset16 @@ -937,22 +937,22 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(<2 x s16>) = G_INSERT %0, %2, 16 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<2 x i16>) = G_INSERT %0, %2(i16), 16 + $vgpr0 = COPY %3(<2 x i16>) ... --- name: test_insert_v3s16_s16_offset0 @@ -963,36 +963,36 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %2 - %4:_(<3 x s16>) = G_INSERT %1, %3, 0 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(<3 x i16>) = G_INSERT %1, %3(i16), 0 + %5:_(<4 x i16>) = G_IMPLICIT_DEF + %6:_(<4 x i16>) = G_INSERT %5, %4(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %6(<4 x i16>) ... --- name: test_insert_v3s16_s16_offset16 @@ -1003,36 +1003,36 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %2 - %4:_(<3 x s16>) = G_INSERT %1, %3, 16 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(<3 x i16>) = G_INSERT %1, %3(i16), 16 + %5:_(<4 x i16>) = G_IMPLICIT_DEF + %6:_(<4 x i16>) = G_INSERT %5, %4(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %6(<4 x i16>) ... --- name: test_insert_v3s16_s16_offset32 @@ -1043,29 +1043,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %2 - %4:_(<3 x s16>) = G_INSERT %1, %3, 32 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(<3 x i16>) = G_INSERT %1, %3(i16), 32 + %5:_(<4 x i16>) = G_IMPLICIT_DEF + %6:_(<4 x i16>) = G_INSERT %5, %4(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %6(<4 x i16>) ... --- name: test_insert_v3s16_v2s16_offset0 @@ -1076,35 +1076,35 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(<2 x s16>) = COPY $vgpr2 - %4:_(<3 x s16>) = G_INSERT %1, %2, 0 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<3 x i16>) = G_INSERT %1, %2(<2 x i16>), 0 + %4:_(<4 x i16>) = G_IMPLICIT_DEF + %5:_(<4 x i16>) = G_INSERT %4, %3(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- name: test_insert_v3s16_v2s16_offset16 @@ -1115,35 +1115,35 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(<2 x s16>) = COPY $vgpr2 - %4:_(<3 x s16>) = G_INSERT %1, %2, 16 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<3 x i16>) = G_INSERT %1, %2(<2 x i16>), 16 + %4:_(<4 x i16>) = G_IMPLICIT_DEF + %5:_(<4 x i16>) = G_INSERT %4, %3(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- name: test_insert_v3s16_s32_offset0 @@ -1154,34 +1154,34 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(s32) = COPY $vgpr2 - %4:_(<3 x s16>) = G_INSERT %1, %2, 0 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i16>) = G_INSERT %1, %2(i32), 0 + %4:_(<4 x i16>) = G_IMPLICIT_DEF + %5:_(<4 x i16>) = G_INSERT %4, %3(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- name: test_insert_v3s16_s32_offset16 @@ -1192,34 +1192,34 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s32_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(s32) = COPY $vgpr2 - %4:_(<3 x s16>) = G_INSERT %1, %2, 16 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i16>) = G_INSERT %1, %2(i32), 16 + %4:_(<4 x i16>) = G_IMPLICIT_DEF + %5:_(<4 x i16>) = G_INSERT %4, %3(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %5(<4 x i16>) ... --- name: test_insert_v4s16_s16_offset0 @@ -1230,24 +1230,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(<4 x s16>) = G_INSERT %0, %2, 0 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<4 x i16>) = G_INSERT %0, %2(i16), 0 + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- name: test_insert_v4s16_s16_offset16 @@ -1258,24 +1258,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(<4 x s16>) = G_INSERT %0, %2, 16 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<4 x i16>) = G_INSERT %0, %2(i16), 16 + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- name: test_insert_v4s16_s16_offset32 @@ -1286,24 +1286,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(<4 x s16>) = G_INSERT %0, %2, 32 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<4 x i16>) = G_INSERT %0, %2(i16), 32 + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- name: test_insert_v4s16_s16_offset48 @@ -1314,24 +1314,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset48 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(<4 x s16>) = G_INSERT %0, %2, 48 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(<4 x i16>) = G_INSERT %0, %2(i16), 48 + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- name: test_insert_v4s16_v2s16_offset0 @@ -1342,23 +1342,23 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<4 x s16>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<4 x i16>) = G_INSERT %0, %1(<2 x i16>), 0 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- name: test_insert_v4s16_v2s16_offset16 @@ -1369,30 +1369,30 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<4 x s16>) = G_INSERT %0, %1, 16 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<4 x i16>) = G_INSERT %0, %1(<2 x i16>), 16 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- name: test_insert_v4s16_v2s16_offset32 @@ -1403,23 +1403,23 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<4 x s16>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<4 x i16>) = G_INSERT %0, %1(<2 x i16>), 32 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- name: test_insert_v4s16_v3s16_offset0 @@ -1430,26 +1430,26 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<3 x s16>) = G_EXTRACT %1, 0 - %3:_(<4 x s16>) = G_INSERT %0, %2, 0 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x i16>) = G_EXTRACT %1(<4 x i16>), 0 + %3:_(<4 x i16>) = G_INSERT %0, %2(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- name: test_insert_v4s16_v3s16_offset16 @@ -1460,32 +1460,32 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<3 x s16>) = G_EXTRACT %1, 0 - %3:_(<4 x s16>) = G_INSERT %0, %2, 16 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x i16>) = G_EXTRACT %1(<4 x i16>), 0 + %3:_(<4 x i16>) = G_INSERT %0, %2(<3 x i16>), 16 + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... --- name: test_insert_v4s16_s32_offset0 @@ -1496,22 +1496,22 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<4 x s16>) = G_INSERT %0, %1, 0 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(<4 x i16>) = G_INSERT %0, %1(i32), 0 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- name: test_insert_v4s16_s32_offset16 @@ -1522,29 +1522,29 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<4 x s16>) = G_INSERT %0, %1, 16 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(<4 x i16>) = G_INSERT %0, %1(i32), 16 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- name: test_insert_v4s16_s32_offset32 @@ -1555,22 +1555,22 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<4 x s16>) = G_INSERT %0, %1, 32 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(<4 x i16>) = G_INSERT %0, %1(i32), 32 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -1582,16 +1582,16 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s16_offset0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_INSERT %0, %2, 0 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[TRUNC]](i16), 0 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_INSERT %0, %2(i16), 0 + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_insert_s64_s16_offset16 @@ -1602,16 +1602,16 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s16_offset16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_INSERT %0, %2, 16 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[TRUNC]](i16), 16 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_INSERT %0, %2(i16), 16 + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_insert_s64_s16_offset32 @@ -1622,16 +1622,16 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s16_offset32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_INSERT %0, %2, 32 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[TRUNC]](i16), 32 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_INSERT %0, %2(i16), 32 + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_insert_s64_s16_offset48 @@ -1642,16 +1642,16 @@ body: | ; CHECK-LABEL: name: test_insert_s64_s16_offset48 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 48 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_INSERT %0, %2, 48 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(i64) = G_INSERT [[COPY]], [[TRUNC]](i16), 48 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_INSERT %0, %2(i16), 48 + $vgpr0_vgpr1 = COPY %3(i64) ... --- name: test_insert_s32_s16_offset0 @@ -1662,20 +1662,20 @@ body: | ; CHECK-LABEL: name: test_insert_s32_s16_offset0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(s32) = G_INSERT %1, %2, 0 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -65536 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[AND]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i32) = G_INSERT %1, %2(i16), 0 + $vgpr0 = COPY %3(i32) ... --- @@ -1687,22 +1687,22 @@ body: | ; CHECK-LABEL: name: test_insert_s32_s16_offset1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(s32) = G_INSERT %1, %2, 1 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -131071 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i32) = G_INSERT %1, %2(i16), 1 + $vgpr0 = COPY %3(i32) ... --- @@ -1714,22 +1714,22 @@ body: | ; CHECK-LABEL: name: test_insert_s32_s16_offset8 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16776961 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(s32) = G_INSERT %1, %2, 8 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -16776961 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i32) = G_INSERT %1, %2(i16), 8 + $vgpr0 = COPY %3(i32) ... --- @@ -1741,19 +1741,19 @@ body: | ; CHECK-LABEL: name: test_insert_s32_s16_offset16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(s32) = G_INSERT %1, %2, 16 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY2]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i32) = G_INSERT %1, %2(i16), 16 + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir index 98336560ad5e6..fa898fe9ec6ac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir @@ -10,23 +10,29 @@ body: | ; CHECK-LABEL: name: test_amdgcn_fdiv_fast ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x45F0000000000000 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY1]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FMUL]](s32) - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[SELECT]], [[FMUL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fdiv.fast), %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[BITCAST1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x45F0000000000000 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(ogt), [[FABS]](f32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST1]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FMUL]](f32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[BITCAST]], [[INT]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[SELECT]], [[FMUL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL2]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fdiv.fast), %2(f32), %3(f32) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -38,21 +44,27 @@ body: | ; CHECK-LABEL: name: test_amdgcn_fdiv_fast_propagate_flags ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x45F0000000000000 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ogt), [[FABS]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nsz G_FMUL [[COPY1]], [[SELECT]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FMUL]](s32) - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nsz G_FMUL [[COPY]], [[INT]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = nsz G_FMUL [[SELECT]], [[FMUL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fdiv.fast), %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(f32) = nsz G_FABS [[BITCAST1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x45F0000000000000 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nsz G_FCMP floatpred(ogt), [[FABS]](f32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(f32) = nsz G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(f32) = nsz G_FMUL [[BITCAST1]], [[SELECT]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(f32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FMUL]](f32) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = nsz G_FMUL [[BITCAST]], [[INT]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = nsz G_FMUL [[SELECT]], [[FMUL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[FMUL2]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fdiv.fast), %2(f32), %3(f32) + %5:_(i32) = G_BITCAST %4(f32) + $vgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir index 2a3fa6fbfdb77..956b56038058b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -14,65 +14,82 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX6-NEXT: %10:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) ; ; GFX8-LABEL: name: test_intrinsic_round_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX8-NEXT: %10:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) ; ; GFX9-LABEL: name: test_intrinsic_round_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX9-NEXT: %10:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_ROUND %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_ROUND %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -84,65 +101,82 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s32_flags ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX6-NEXT: %10:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = nsz G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nsz G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f32) = nsz G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nsz G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) ; ; GFX8-LABEL: name: test_intrinsic_round_s32_flags ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX8-NEXT: %10:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = nsz G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nsz G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f32) = nsz G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nsz G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) ; ; GFX9-LABEL: name: test_intrinsic_round_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX9-NEXT: %10:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = nsz G_INTRINSIC_ROUND %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = nsz G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = nsz G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f32) = nsz G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = nsz G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[FADD]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nsz G_INTRINSIC_ROUND %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -154,87 +188,107 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C8]] - ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C9]], [[C10]] - ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT2]], [[C12]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C11]] - ; GFX6-NEXT: %10:_(s64) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], %10 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[FADD]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f64), [[C8]] + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[C9]], [[C10]] + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C12:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT2]](f64) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[C12]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST5]], [[C11]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[SELECT1]], [[BITCAST6]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(i64) = G_BITCAST [[FADD1]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](i64) ; ; GFX8-LABEL: name: test_intrinsic_round_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C3]] - ; GFX8-NEXT: %10:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f64), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[FADD1]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST4]](i64) ; ; GFX9-LABEL: name: test_intrinsic_round_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C3]] - ; GFX9-NEXT: %10:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], %10 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_INTRINSIC_ROUND %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[BITCAST]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f64), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[FADD1]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST4]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_INTRINSIC_ROUND %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -246,98 +300,124 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] - ; GFX6-NEXT: %24:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], %24 - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; GFX6-NEXT: %14:_(s32) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[FSUB1]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f32), [[C]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT1]](f32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST6]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](<2 x i32>) ; ; GFX8-LABEL: name: test_intrinsic_round_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] - ; GFX8-NEXT: %24:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], %24 - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C4]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; GFX8-NEXT: %14:_(s32) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[FSUB1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f32), [[C]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT1]](f32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST6]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](<2 x i32>) ; ; GFX9-LABEL: name: test_intrinsic_round_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] - ; GFX9-NEXT: %24:_(s32) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], %24 - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C4]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; GFX9-NEXT: %14:_(s32) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_INTRINSIC_ROUND %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f32) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](f32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f32) = G_BITCAST [[OR]](i32) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f32) = G_FABS [[FSUB1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f32), [[C]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f32) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT1]](f32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](f32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f32) = G_BITCAST [[OR1]](i32) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[FADD]](f32), [[FADD1]](f32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_INTRINSIC_ROUND %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -349,134 +429,166 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C8]] - ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C9]], [[C10]] - ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT2]], [[C12]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C11]] - ; GFX6-NEXT: %45:_(s64) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], %45 - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND4]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND5]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] - ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT4]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C8]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[C9]], [[C10]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[SELECT5]], [[C12]] - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C11]] - ; GFX6-NEXT: %14:_(s64) = disjoint G_OR [[AND6]], [[AND7]] - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], %14 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](i32), [[C]](i32), [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[SELECT1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[FADD]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f64), [[C8]] + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[C9]], [[C10]] + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C12:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT2]](f64) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[C12]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST5]], [[C11]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[SELECT1]], [[BITCAST6]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST7]](i64) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](i32), [[C]](i32), [[C1]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[INT1]], [[C2]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND4]](i32) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB1]](i32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(i64) = G_AND [[BITCAST7]], [[XOR1]] + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(f64) = G_BITCAST [[AND5]](i64) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB1]](i32), [[C5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB1]](i32), [[C7]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f64) = G_SELECT [[ICMP2]](i1), [[BITCAST8]], [[BITCAST9]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(f64) = G_SELECT [[ICMP3]](i1), [[UV1]], [[SELECT3]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[SELECT4]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(f64) = G_FABS [[FADD2]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f64), [[C8]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(f64) = G_SELECT [[FCMP1]](i1), [[C9]], [[C10]] + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT5]](f64) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(i64) = G_AND [[BITCAST10]], [[C12]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(i64) = G_AND [[BITCAST11]], [[C11]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND6]], [[AND7]] + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(f64) = G_BITCAST [[OR1]](i64) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f64) = G_FADD [[SELECT4]], [[BITCAST12]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD1]](f64), [[FADD3]](f64) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST13]](<2 x i64>) ; ; GFX8-LABEL: name: test_intrinsic_round_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C3]] - ; GFX8-NEXT: %25:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], %25 - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT1]], [[C4]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C3]] - ; GFX8-NEXT: %14:_(s64) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f64), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(f64) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f64), [[C]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT1]](f64) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST5]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[OR1]](i64) + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f64) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST6]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD1]](f64), [[FADD3]](f64) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST7]](<2 x i64>) ; ; GFX9-LABEL: name: test_intrinsic_round_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C3]] - ; GFX9-NEXT: %25:_(s64) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], %25 - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT1]], [[C4]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C3]] - ; GFX9-NEXT: %14:_(s64) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_INTRINSIC_ROUND %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[UV]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f64) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f64), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST2]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[OR]](i64) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f64) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(f64) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f64) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f64), [[C]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[FCMP1]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT1]](f64) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[C4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST5]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[OR1]](i64) + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f64) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[FADD1]](f64), [[FADD3]](f64) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST7]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_INTRINSIC_ROUND %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... --- @@ -488,84 +600,101 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6-NEXT: %12:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT %12(s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[FABS]](f16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[C]](f16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT3]](f32), [[FPEXT4]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST1]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: test_intrinsic_round_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8-NEXT: %12:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %12 - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST1]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_intrinsic_round_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-NEXT: %12:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %12 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_INTRINSIC_ROUND %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST2]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST4]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_INTRINSIC_ROUND %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -577,140 +706,179 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6-NEXT: %35:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT %35(s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX6-NEXT: %14:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT %14(s16) - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %58(i16) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[FABS]](f16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT3]](f32), [[FPEXT4]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST5]], [[C5]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST6]], [[C4]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG1]](f16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[FABS1]](f16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT10]](f32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST8]], [[C5]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C4]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC5]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST13]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x i16>) ; ; GFX8-LABEL: name: test_intrinsic_round_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX8-NEXT: %24:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %24 - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]] - ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX8-NEXT: %14:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST1]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST1]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST5]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST6]], [[C4]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST7]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FSUB1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f16), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST8]], [[C5]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C4]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX8-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST10]] + ; GFX8-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX8-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST11]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST12]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST13]](<2 x f16>) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x i16>) ; ; GFX9-LABEL: name: test_intrinsic_round_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX9-NEXT: %24:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %24 - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX9-NEXT: %14:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], %14 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_INTRINSIC_ROUND %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %35(i16) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %36(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST1]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST5]], [[C5]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST6]], [[C4]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST7]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FSUB1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f16), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST8]], [[C5]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C4]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST10]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST11]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_INTRINSIC_ROUND %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -721,233 +889,280 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6-NEXT: %65:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT %65(s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX6-NEXT: %46:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT %46(s16) - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] - ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] - ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] - ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) - ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] - ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) - ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SELECT2]], [[C5]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX6-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]] - ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT %25(s16) - ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] - ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C6]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[FABS]](f16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT3]](f32), [[FPEXT4]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST5]], [[C5]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST6]], [[C4]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG1]](f16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[FABS1]](f16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT10]](f32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST8]], [[C5]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C4]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST10]](f16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT14]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](f32) + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC6]] + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG2]](f16) + ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FPEXT15]], [[FPEXT16]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD4]](f32) + ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC7]] + ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(f32) = G_FPEXT [[FABS2]](f16) + ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT17]](f32), [[FPEXT18]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f16) = G_SELECT [[FCMP2]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT2]](f16) + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[BITCAST11]], [[C5]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[BITCAST12]], [[C4]] + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[AND4]], [[AND5]] + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[OR2]](i16) + ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC6]](f16) + ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FPEXT19]], [[FPEXT20]] + ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD5]](f32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC5]](f16) + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC8]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST14]], [[C6]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST15]], [[C6]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x i16>), [[BITCAST20]](<2 x i16>), [[BITCAST21]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: test_intrinsic_round_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX8-NEXT: %43:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %43 - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]] - ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX8-NEXT: %35:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], %35 - ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]] - ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C1]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SELECT2]], [[C5]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX8-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %25 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C6]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST5]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST6]], [[C4]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST7]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST3]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FSUB1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f16), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST8]], [[C5]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C4]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX8-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST10]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST4]] + ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[INTRINSIC_TRUNC2]] + ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(f16) = G_FABS [[FSUB2]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS2]](f16), [[C1]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(f16) = G_SELECT [[FCMP2]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT2]](f16) + ; GFX8-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[BITCAST11]], [[C5]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[BITCAST12]], [[C4]] + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[AND4]], [[AND5]] + ; GFX8-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[OR2]](i16) + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC2]], [[BITCAST13]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX8-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX8-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST14]], [[C6]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST15]], [[C6]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x i16>), [[BITCAST20]](<2 x i16>), [[BITCAST21]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_intrinsic_round_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX9-NEXT: %43:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %43 - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX9-NEXT: %35:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], %35 - ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]] - ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C1]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SELECT2]], [[C5]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %25 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_INTRINSIC_ROUND %1 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST5]], [[C5]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST6]], [[C4]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST7]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST3]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FSUB1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f16), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST8]], [[C5]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C4]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST10]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST4]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[INTRINSIC_TRUNC2]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(f16) = G_FABS [[FSUB2]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS2]](f16), [[C1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f16) = G_SELECT [[FCMP2]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT2]](f16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[BITCAST11]], [[C5]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[BITCAST12]], [[C4]] + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[AND4]], [[AND5]] + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[OR2]](i16) + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC2]], [[BITCAST13]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST14]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST14]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST15]](i32) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST16]](i16), [[BITCAST17]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST18]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x f16>) = G_BITCAST %1(<3 x i16>) + %4:_(<3 x f16>) = G_INTRINSIC_ROUND %3 + %5:_(<3 x i16>) = G_IMPLICIT_DEF + %6:_(<3 x i16>) = G_BITCAST %4(<3 x f16>) + %7:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %5(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %7(<6 x i16>) ... --- @@ -959,243 +1174,313 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v4s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6-NEXT: %77:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT %77(s16) - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX6-NEXT: %58:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT %58(s16) - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] - ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] - ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] - ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) - ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] - ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) - ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SELECT2]], [[C5]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX6-NEXT: %39:_(s16) = disjoint G_OR [[AND4]], [[AND5]] - ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT %39(s16) - ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] - ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6-NEXT: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT21]] - ; GFX6-NEXT: [[FPTRUNC9:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) - ; GFX6-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC9]] - ; GFX6-NEXT: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6-NEXT: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) - ; GFX6-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FPEXT22]], [[FPEXT23]] - ; GFX6-NEXT: [[FPTRUNC10:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD6]](s32) - ; GFX6-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC10]] - ; GFX6-NEXT: [[FPEXT24:%[0-9]+]]:_(s32) = G_FPEXT [[FABS3]](s16) - ; GFX6-NEXT: [[FPEXT25:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT24]](s32), [[FPEXT25]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[SELECT3]], [[C5]] - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX6-NEXT: %18:_(s16) = disjoint G_OR [[AND6]], [[AND7]] - ; GFX6-NEXT: [[FPEXT26:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC9]](s16) - ; GFX6-NEXT: [[FPEXT27:%[0-9]+]]:_(s32) = G_FPEXT %18(s16) - ; GFX6-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FPEXT26]], [[FPEXT27]] - ; GFX6-NEXT: [[FPTRUNC11:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC11]](s16) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %109(i16) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %115(i16) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %110(i16) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %116(i16) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG]](f16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD]](f32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[FABS]](f16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT3]](f32), [[FPEXT4]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX6-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX6-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C5]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST10]], [[C4]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC]](f16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST11]](f16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD1]](f32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG1]](f16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD2]](f32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[FABS1]](f16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT10]](f32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX6-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST12]], [[C5]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST13]], [[C4]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC3]](f16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD3]](f32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT14]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](f32) + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC6]] + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG2]](f16) + ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(f32) = G_FADD [[FPEXT15]], [[FPEXT16]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD4]](f32) + ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC7]] + ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(f32) = G_FPEXT [[FABS2]](f16) + ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT17]](f32), [[FPEXT18]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(f16) = G_SELECT [[FCMP2]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT2]](f16) + ; GFX6-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[BITCAST15]], [[C5]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[BITCAST16]], [[C4]] + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[AND4]], [[AND5]] + ; GFX6-NEXT: [[BITCAST17:%[0-9]+]]:_(f16) = G_BITCAST [[OR2]](i16) + ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC6]](f16) + ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST17]](f16) + ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(f32) = G_FADD [[FPEXT19]], [[FPEXT20]] + ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD5]](f32) + ; GFX6-NEXT: [[FPEXT21:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT21]] + ; GFX6-NEXT: [[FPTRUNC9:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](f32) + ; GFX6-NEXT: [[FNEG3:%[0-9]+]]:_(f16) = G_FNEG [[FPTRUNC9]] + ; GFX6-NEXT: [[FPEXT22:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX6-NEXT: [[FPEXT23:%[0-9]+]]:_(f32) = G_FPEXT [[FNEG3]](f16) + ; GFX6-NEXT: [[FADD6:%[0-9]+]]:_(f32) = G_FADD [[FPEXT22]], [[FPEXT23]] + ; GFX6-NEXT: [[FPTRUNC10:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD6]](f32) + ; GFX6-NEXT: [[FABS3:%[0-9]+]]:_(f16) = G_FABS [[FPTRUNC10]] + ; GFX6-NEXT: [[FPEXT24:%[0-9]+]]:_(f32) = G_FPEXT [[FABS3]](f16) + ; GFX6-NEXT: [[FPEXT25:%[0-9]+]]:_(f32) = G_FPEXT [[C1]](f16) + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FPEXT24]](f32), [[FPEXT25]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(f16) = G_SELECT [[FCMP3]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT3]](f16) + ; GFX6-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[BITCAST18]], [[C5]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[BITCAST19]], [[C4]] + ; GFX6-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[AND6]], [[AND7]] + ; GFX6-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST [[OR3]](i16) + ; GFX6-NEXT: [[FPEXT26:%[0-9]+]]:_(f32) = G_FPEXT [[FPTRUNC9]](f16) + ; GFX6-NEXT: [[FPEXT27:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST20]](f16) + ; GFX6-NEXT: [[FADD7:%[0-9]+]]:_(f32) = G_FADD [[FPEXT26]], [[FPEXT27]] + ; GFX6-NEXT: [[FPTRUNC11:%[0-9]+]]:_(f16) = G_FPTRUNC [[FADD7]](f32) + ; GFX6-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; GFX6-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC5]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR4]](i32) + ; GFX6-NEXT: [[BITCAST24:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC8]](f16) + ; GFX6-NEXT: [[BITCAST25:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC11]](f16) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST24]](i16) + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST25]](i16) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST26:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR5]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST23]](<2 x f16>), [[BITCAST26]](<2 x f16>) + ; GFX6-NEXT: [[BITCAST27:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST27]](<4 x i16>) ; ; GFX8-LABEL: name: test_intrinsic_round_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX8-NEXT: %44:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %44 - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]] - ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX8-NEXT: %36:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], %36 - ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]] - ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C1]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SELECT2]], [[C5]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX8-NEXT: %28:_(s16) = disjoint G_OR [[AND4]], [[AND5]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %28 - ; GFX8-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] - ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[INTRINSIC_TRUNC3]] - ; GFX8-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FSUB3]] - ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[SELECT3]], [[C5]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX8-NEXT: %18:_(s16) = disjoint G_OR [[AND6]], [[AND7]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], %18 - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %65(i16) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %71(i16) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %66(i16) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %72(i16) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST1]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST1]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX8-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST10]], [[C4]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST11]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST3]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FSUB1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f16), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX8-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST12]], [[C5]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST13]], [[C4]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX8-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST14]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[INTRINSIC_TRUNC2]] + ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(f16) = G_FABS [[FSUB2]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS2]](f16), [[C1]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(f16) = G_SELECT [[FCMP2]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT2]](f16) + ; GFX8-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[BITCAST15]], [[C5]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[BITCAST16]], [[C4]] + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[AND4]], [[AND5]] + ; GFX8-NEXT: [[BITCAST17:%[0-9]+]]:_(f16) = G_BITCAST [[OR2]](i16) + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC2]], [[BITCAST17]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[INTRINSIC_TRUNC3]] + ; GFX8-NEXT: [[FABS3:%[0-9]+]]:_(f16) = G_FABS [[FSUB3]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS3]](f16), [[C1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(f16) = G_SELECT [[FCMP3]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT3]](f16) + ; GFX8-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[BITCAST18]], [[C5]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[BITCAST19]], [[C4]] + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[AND6]], [[AND7]] + ; GFX8-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST [[OR3]](i16) + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC3]], [[BITCAST20]] + ; GFX8-NEXT: [[BITCAST21:%[0-9]+]]:_(i16) = G_BITCAST [[FADD]](f16) + ; GFX8-NEXT: [[BITCAST22:%[0-9]+]]:_(i16) = G_BITCAST [[FADD1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST21]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST22]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR4]](i32) + ; GFX8-NEXT: [[BITCAST24:%[0-9]+]]:_(i16) = G_BITCAST [[FADD2]](f16) + ; GFX8-NEXT: [[BITCAST25:%[0-9]+]]:_(i16) = G_BITCAST [[FADD3]](f16) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST24]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST25]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST26:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR5]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST23]](<2 x f16>), [[BITCAST26]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST27:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST27]](<4 x i16>) ; ; GFX9-LABEL: name: test_intrinsic_round_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]] - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[SELECT]], [[C5]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX9-NEXT: %44:_(s16) = disjoint G_OR [[AND]], [[AND1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], %44 - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[SELECT1]], [[C5]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX9-NEXT: %36:_(s16) = disjoint G_OR [[AND2]], [[AND3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], %36 - ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]] - ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C1]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SELECT2]], [[C5]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-NEXT: %28:_(s16) = disjoint G_OR [[AND4]], [[AND5]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %28 - ; GFX9-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] - ; GFX9-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[INTRINSIC_TRUNC3]] - ; GFX9-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FSUB3]] - ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[SELECT3]], [[C5]] - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX9-NEXT: %18:_(s16) = disjoint G_OR [[AND6]], [[AND7]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], %18 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_INTRINSIC_ROUND %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x f16>), [[UV1:%[0-9]+]]:_(<2 x f16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x f16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %65(i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %71(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %66(i16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %72(i16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST6]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[UV]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST8]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST1]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST1]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(f16) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS]](f16), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[FCMP]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST1]](f16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[BITCAST9]], [[C5]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[BITCAST10]], [[C4]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = disjoint G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST [[OR]](i16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC]], [[BITCAST11]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST3]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST3]], [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(f16) = G_FABS [[FSUB1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS1]](f16), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(f16) = G_SELECT [[FCMP1]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT1]](f16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST3]](f16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[BITCAST12]], [[C5]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[BITCAST13]], [[C4]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = disjoint G_OR [[AND2]], [[AND3]] + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[OR1]](i16) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC1]], [[BITCAST14]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST2]], [[INTRINSIC_TRUNC2]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(f16) = G_FABS [[FSUB2]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS2]](f16), [[C1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(f16) = G_SELECT [[FCMP2]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT2]](f16) + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST2]](f16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[BITCAST15]], [[C5]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[BITCAST16]], [[C4]] + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i16) = disjoint G_OR [[AND4]], [[AND5]] + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(f16) = G_BITCAST [[OR2]](i16) + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC2]], [[BITCAST17]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST4]] + ; GFX9-NEXT: [[FSUB3:%[0-9]+]]:_(f16) = G_FSUB [[BITCAST4]], [[INTRINSIC_TRUNC3]] + ; GFX9-NEXT: [[FABS3:%[0-9]+]]:_(f16) = G_FABS [[FSUB3]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(i1) = G_FCMP floatpred(oge), [[FABS3]](f16), [[C1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(f16) = G_SELECT [[FCMP3]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT3]](f16) + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST4]](f16) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[BITCAST18]], [[C5]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[BITCAST19]], [[C4]] + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i16) = disjoint G_OR [[AND6]], [[AND7]] + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(f16) = G_BITCAST [[OR3]](i16) + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f16) = G_FADD [[INTRINSIC_TRUNC3]], [[BITCAST20]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD]](f16), [[FADD1]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[FADD2]](f16), [[FADD3]](f16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x f16>), [[BUILD_VECTOR1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST21]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %2:_(<4 x f16>) = G_INTRINSIC_ROUND %1 + %3:_(<4 x i16>) = G_BITCAST %2(<4 x f16>) + $vgpr0_vgpr1 = COPY %3(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir index cc0369bf509c3..bc3165c29794b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir @@ -15,44 +15,57 @@ body: | ; SI-LABEL: name: test_intrinsic_trunc_s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; CI-LABEL: name: test_intrinsic_trunc_s16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; VI-LABEL: name: test_intrinsic_trunc_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INTRINSIC_TRUNC]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INTRINSIC_TRUNC]](f16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_intrinsic_trunc_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INTRINSIC_TRUNC]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_INTRINSIC_TRUNC %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[INTRINSIC_TRUNC]](f16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_INTRINSIC_TRUNC %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -64,30 +77,43 @@ body: | ; SI-LABEL: name: test_intrinsic_trunc_s32 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_TRUNC]](f32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; CI-LABEL: name: test_intrinsic_trunc_s32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; CI-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_TRUNC]](f32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; VI-LABEL: name: test_intrinsic_trunc_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_TRUNC]](f32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + ; ; GFX9-LABEL: name: test_intrinsic_trunc_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_TRUNC %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST [[COPY]](i32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_TRUNC %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -99,49 +125,65 @@ body: | ; SI-LABEL: name: test_intrinsic_trunc_s64 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[BITCAST]](f64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[BITCAST]], [[SELECT]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT1]](f64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST4]](i64) + ; ; CI-LABEL: name: test_intrinsic_trunc_s64 ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INTRINSIC_TRUNC]](f64) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; VI-LABEL: name: test_intrinsic_trunc_s64 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INTRINSIC_TRUNC]](f64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + ; ; GFX9-LABEL: name: test_intrinsic_trunc_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_INTRINSIC_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[BITCAST]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[INTRINSIC_TRUNC]](f64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f64) = G_INTRINSIC_TRUNC %1 + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -153,78 +195,109 @@ body: | ; SI-LABEL: name: test_intrinsic_trunc_v2s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](f32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; CI-LABEL: name: test_intrinsic_trunc_v2s16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; CI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]] - ; CI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %12(i16) + ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %13(i16) + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC]](f32) + ; CI-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; CI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FPEXT1]] + ; CI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](f32) + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; VI-LABEL: name: test_intrinsic_trunc_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INTRINSIC_TRUNC]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INTRINSIC_TRUNC1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i16) = G_BITCAST [[INTRINSIC_TRUNC]](f16) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(i16) = G_BITCAST [[INTRINSIC_TRUNC1]](f16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST5]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST6]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST7]](<2 x f16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x i16>) + ; ; GFX9-LABEL: name: test_intrinsic_trunc_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s16), [[INTRINSIC_TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_INTRINSIC_TRUNC %0 - $vgpr0 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST %8(i16) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %9(i16) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f16) = G_INTRINSIC_TRUNC [[BITCAST2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f16), [[INTRINSIC_TRUNC1]](f16) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BUILD_VECTOR]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %2:_(<2 x f16>) = G_INTRINSIC_TRUNC %1 + %3:_(<2 x i16>) = G_BITCAST %2(<2 x f16>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -236,42 +309,55 @@ body: | ; SI-LABEL: name: test_intrinsic_trunc_v2s32 ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; SI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; SI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f32), [[INTRINSIC_TRUNC1]](f32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; CI-LABEL: name: test_intrinsic_trunc_v2s32 ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; CI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; CI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f32), [[INTRINSIC_TRUNC1]](f32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; VI-LABEL: name: test_intrinsic_trunc_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f32), [[INTRINSIC_TRUNC1]](f32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + ; ; GFX9-LABEL: name: test_intrinsic_trunc_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_INTRINSIC_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f32>) = G_BITCAST [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f32), [[UV1:%[0-9]+]]:_(f32) = G_UNMERGE_VALUES [[BITCAST]](<2 x f32>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f32), [[INTRINSIC_TRUNC1]](f32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_BITCAST %0(<2 x i32>) + %2:_(<2 x f32>) = G_INTRINSIC_TRUNC %1 + %3:_(<2 x i32>) = G_BITCAST %2(<2 x f32>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -283,70 +369,89 @@ body: | ; SI-LABEL: name: test_intrinsic_trunc_v2s64 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND2]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND3]] - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i64) = G_BITCAST [[UV]](f64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1023 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4503599627370495 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(f64) = G_BITCAST [[MV]](i64) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[BITCAST1]], [[XOR]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(f64) = G_BITCAST [[AND1]](i64) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 51 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[C5]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB]](i32), [[C7]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[ICMP]](i1), [[BITCAST2]], [[BITCAST3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(f64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i64) = G_BITCAST [[UV1]](f64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST4]](i64) + ; SI-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](i32), [[C]](i32), [[C1]](i32) + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[INT1]], [[C2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C3]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C5]](i32), [[AND2]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(f64) = G_BITCAST [[MV1]](i64) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[C4]], [[SUB1]](i32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[ASHR1]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[BITCAST4]], [[XOR1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(f64) = G_BITCAST [[AND3]](i64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB1]](i32), [[C5]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SUB1]](i32), [[C7]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(f64) = G_SELECT [[ICMP2]](i1), [[BITCAST5]], [[BITCAST6]] + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(f64) = G_SELECT [[ICMP3]](i1), [[UV1]], [[SELECT2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[SELECT1]](f64), [[SELECT3]](f64) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST7]](<2 x i64>) + ; ; CI-LABEL: name: test_intrinsic_trunc_v2s64 ; CI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; CI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; CI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; CI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f64), [[INTRINSIC_TRUNC1]](f64) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; VI-LABEL: name: test_intrinsic_trunc_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; VI-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; VI-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f64), [[INTRINSIC_TRUNC1]](f64) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + ; ; GFX9-LABEL: name: test_intrinsic_trunc_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_INTRINSIC_TRUNC %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f64>) = G_BITCAST [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(f64), [[UV1:%[0-9]+]]:_(f64) = G_UNMERGE_VALUES [[BITCAST]](<2 x f64>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](f64), [[INTRINSIC_TRUNC1]](f64) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f64>) = G_BITCAST %0(<2 x i64>) + %2:_(<2 x f64>) = G_INTRINSIC_TRUNC %1 + %3:_(<2 x i64>) = G_BITCAST %2(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir index df13d7c50fc96..cdfc9a81df65f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s64_to_p0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_INTTOPTR %0 - $vgpr0_vgpr1 = COPY %1 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(p0) = G_INTTOPTR %0(i64) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -27,12 +27,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s64_to_p1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[COPY]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[COPY]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_INTTOPTR %0 - $vgpr0_vgpr1 = COPY %1 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(p1) = G_INTTOPTR %0(i64) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -44,12 +44,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s64_to_p4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[COPY]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[COPY]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_INTTOPTR %0 - $vgpr0_vgpr1 = COPY %1 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(p4) = G_INTTOPTR %0(i64) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -61,12 +61,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s32_to_p3 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[COPY]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) - %0:_(s32) = COPY $vgpr0 - %1:_(p3) = G_INTTOPTR %0 - $vgpr0 = COPY %1 + %0:_(i32) = COPY $vgpr0 + %1:_(p3) = G_INTTOPTR %0(i32) + $vgpr0 = COPY %1(p3) ... --- @@ -78,12 +78,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s32_to_p5 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[COPY]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) - %0:_(s32) = COPY $vgpr0 - %1:_(p5) = G_INTTOPTR %0 - $vgpr0 = COPY %1 + %0:_(i32) = COPY $vgpr0 + %1:_(p5) = G_INTTOPTR %0(i32) + $vgpr0 = COPY %1(p5) ... --- @@ -95,12 +95,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s64_to_p999 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p999) = G_INTTOPTR [[COPY]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p999) = G_INTTOPTR [[COPY]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p999) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(p999) = G_INTTOPTR %0 - $vgpr0_vgpr1 = COPY %1 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(p999) = G_INTTOPTR %0(i64) + $vgpr0_vgpr1 = COPY %1(p999) ... --- @@ -112,13 +112,13 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s32_to_p0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0) - %0:_(s32) = COPY $vgpr0 - %1:_(p0) = G_INTTOPTR %0 - $vgpr0_vgpr1 = COPY %1 + %0:_(i32) = COPY $vgpr0 + %1:_(p0) = G_INTTOPTR %0(i32) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -130,13 +130,13 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s128_to_p0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s128) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[TRUNC]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i128) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[TRUNC]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(p0) = G_INTTOPTR %0 - $vgpr0_vgpr1 = COPY %1 + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(p0) = G_INTTOPTR %0(i128) + $vgpr0_vgpr1 = COPY %1(p0) ... --- @@ -148,15 +148,15 @@ body: | ; CHECK-LABEL: name: test_inttoptr_v2s64_to_v2p0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[UV]](s64) - ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[UV1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[UV]](i64) + ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[UV1]](i64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[INTTOPTR]](p0), [[INTTOPTR1]](p0) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x p0>) = G_INTTOPTR %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p0>) = G_INTTOPTR %0(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p0>) ... --- @@ -168,17 +168,17 @@ body: | ; CHECK-LABEL: name: test_inttoptr_v2s32_to_v2p0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](s64) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV]](i32) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](i64) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT1]](i64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[INTTOPTR]](p0), [[INTTOPTR1]](p0) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x p0>) = G_INTTOPTR %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x p0>) = G_INTTOPTR %0(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p0>) ... --- @@ -190,15 +190,15 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s29_to_p3 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 536870911 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[AND]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 536870911 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[AND]](i32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INTTOPTR]](p3) - %0:_(s32) = COPY $vgpr0 - %1:_(s29) = G_TRUNC %0 - %2:_(p3) = G_INTTOPTR %1 - S_ENDPGM 0, implicit %2 + %0:_(i32) = COPY $vgpr0 + %1:_(i29) = G_TRUNC %0(i32) + %2:_(p3) = G_INTTOPTR %1(i29) + S_ENDPGM 0, implicit %2(p3) ... --- @@ -210,12 +210,12 @@ body: | ; CHECK-LABEL: name: test_inttoptr_s33_to_p3 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[TRUNC]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[TRUNC]](i32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INTTOPTR]](p3) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(p3) = G_INTTOPTR %1 - S_ENDPGM 0, implicit %2 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(p3) = G_INTTOPTR %1(i33) + S_ENDPGM 0, implicit %2(p3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-jump-table.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-jump-table.mir index b54c368fbeeef..7bf1670d4becb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-jump-table.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-jump-table.mir @@ -56,9 +56,9 @@ body: | bb.0.entry: liveins: $sgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s1) = G_ICMP intpred(ugt), %0, %1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i1) = G_ICMP intpred(ugt), %0, %1 %3:_(p0) = G_JUMP_TABLE %jump-table.0 S_NOP 0, implicit %3 G_BRCOND %2, %bb.4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index aebda3f28d5fd..d0200a128de19 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -12,14 +12,14 @@ body: | ; GCN-LABEL: name: s_buffer_load_s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GCN-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(i32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i32)) + ; GCN-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](i32) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(i32) ... @@ -32,24 +32,24 @@ body: | ; GFX67-LABEL: name: s_buffer_load_v3s32 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX67-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX67-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i32>) + ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX12-LABEL: name: s_buffer_load_v3s32 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<3 x i32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(<3 x i32>) ... @@ -62,26 +62,26 @@ body: | ; GFX67-LABEL: name: s_buffer_load_v3p3 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX67-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX67-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i32>) + ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) ; GFX67-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) ; ; GFX12-LABEL: name: s_buffer_load_v3p3 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x i32>) ; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(<3 x p3>) ... @@ -94,26 +94,26 @@ body: | ; GFX67-LABEL: name: s_buffer_load_v6s16 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX67-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) + ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX67-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX67-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i32>) + ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX67-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x i16>) ; ; GFX12-LABEL: name: s_buffer_load_v6s16 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x i32>) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x i16>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<6 x i16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(<6 x i16>) ... @@ -126,16 +126,16 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i192), align 32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x i32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<6 x i32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(<6 x i32>) ... @@ -148,16 +148,16 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) - ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x i64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i192), align 32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i64>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i64>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<3 x i64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(<3 x i64>) ... @@ -170,94 +170,94 @@ body: | ; GFX67-LABEL: name: s_buffer_load_v12s8 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GFX67-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX67-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX67-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX67-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX67-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX67-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; GFX67-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX67-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX67-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; GFX67-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX67-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX67-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) - ; GFX67-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX67-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]] - ; GFX67-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] - ; GFX67-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX67-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX67-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C2]](s32) - ; GFX67-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] - ; GFX67-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX67-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] - ; GFX67-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX67-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX67-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; GFX67-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX67-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C2]](s32) - ; GFX67-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL3]] - ; GFX67-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX67-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] - ; GFX67-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; GFX67-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GFX67-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; GFX67-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX67-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR8]], [[C2]](s32) - ; GFX67-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL5]] - ; GFX67-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX67-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX67-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) + ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX67-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX67-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i32>) + ; GFX67-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX67-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; GFX67-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX67-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; GFX67-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX67-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C3]](i32) + ; GFX67-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; GFX67-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; GFX67-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C3]](i32) + ; GFX67-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; GFX67-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; GFX67-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C3]](i32) + ; GFX67-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX67-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C4]] + ; GFX67-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C4]] + ; GFX67-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; GFX67-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX67-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C2]](i32) + ; GFX67-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL1]] + ; GFX67-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX67-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C4]] + ; GFX67-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C4]] + ; GFX67-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; GFX67-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL2]] + ; GFX67-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX67-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[C2]](i32) + ; GFX67-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[LSHR4]], [[SHL3]] + ; GFX67-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; GFX67-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C4]] + ; GFX67-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR6]], [[C4]] + ; GFX67-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C2]](i32) + ; GFX67-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL4]] + ; GFX67-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; GFX67-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LSHR8]], [[C2]](i32) + ; GFX67-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR7]], [[SHL5]] + ; GFX67-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; GFX67-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>), [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX67-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x i16>) ; ; GFX12-LABEL: name: s_buffer_load_v12s8 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX12-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX12-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX12-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX12-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX12-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) - ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; GFX12-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX12-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; GFX12-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - %3:_(<12 x s16>) = G_ANYEXT %2 - S_ENDPGM 0, implicit %3 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x i32>) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C3]](i32) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; GFX12-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; GFX12-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C3]](i32) + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX12-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; GFX12-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; GFX12-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C3]](i32) + ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; GFX12-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX12-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; GFX12-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x i16>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<12 x i8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + %3:_(<12 x i16>) = G_ANYEXT %2(<12 x i8>) + S_ENDPGM 0, implicit %3(<12 x i16>) ... @@ -270,23 +270,23 @@ body: | ; GFX67-LABEL: name: s_buffer_load_s96 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX67-NEXT: {{ $}} - ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX67-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX67-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x i32>) + ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX12-LABEL: name: s_buffer_load_s96 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i96), align 16) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<3 x i32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0(<4 x i32>), %1(i32), 0 + S_ENDPGM 0, implicit %2(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index cd23abedaa4c2..a2b556466df9c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -11,30 +11,30 @@ body: | ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[MV]](p4) :: (load (i8), addrspace 6) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 6) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 6) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 6) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p6) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p6) :: (load (i32), align 1, addrspace 6) + $vgpr0 = COPY %1(i32) ... --- @@ -47,12 +47,12 @@ body: | ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p6) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p6) :: (load (i32), addrspace 6) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index ff1d3fe379673..5dda3f1f1020b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -15,32 +15,32 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_constant_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_load_constant_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 4) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i1) = G_LOAD %0(p1) :: (load (i1), addrspace 4) + %2:_(i32) = G_ZEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -53,32 +53,32 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_constant_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_load_constant_s2_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 4) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i2) = G_LOAD %0(p4) :: (load (i2), addrspace 4) + %2:_(i32) = G_ZEXT %1(i2) + $vgpr0 = COPY %2(i32) ... --- @@ -91,26 +91,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p4) :: (load (i8), align 4, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -123,26 +123,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p4) :: (load (i8), addrspace 4) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -155,26 +155,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p4) :: (load (i16), align 4, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -187,26 +187,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p4) :: (load (i16), addrspace 4) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -219,44 +219,44 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_constant_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_constant_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p4) :: (load (i16), align 1, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -269,25 +269,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p4) :: (load (i32), addrspace 4) + $vgpr0 = COPY %1(i32) ... --- @@ -300,43 +300,43 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_constant_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_constant_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 4) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p4) :: (load (i32), align 2, addrspace 4) + $vgpr0 = COPY %1(i32) ... --- @@ -349,73 +349,73 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_constant_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_constant_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 4) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p4) :: (load (i32), align 1, addrspace 4) + $vgpr0 = COPY %1(i32) ... --- @@ -428,26 +428,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), align 8, addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s24_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), align 8, addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s24_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), align 8, addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p4) :: (load (i24), align 8, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -460,26 +460,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p4) :: (load (i24), align 4, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -492,44 +492,44 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 2, align 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_constant_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 2, align 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_constant_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 2, align 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p4) :: (load (i24), align 2, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -542,62 +542,62 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; VI-LABEL: name: test_load_constant_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX9-LABEL: name: test_load_constant_s24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 4) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p4) :: (load (i24), align 1, addrspace 4) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -610,32 +610,32 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 4) - %2:_(s64) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i48) = G_LOAD %0(p4) :: (load (i48), align 8, addrspace 4) + %2:_(i64) = G_ZEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -648,25 +648,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_constant_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-LABEL: name: test_load_constant_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i64), addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -679,25 +679,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), align 4, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_constant_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), align 4, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-LABEL: name: test_load_constant_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p4) :: (load (i64), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i64), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -710,79 +710,79 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; VI-LABEL: name: test_load_constant_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; GFX9-LABEL: name: test_load_constant_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i64), align 2, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -795,133 +795,133 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; VI-LABEL: name: test_load_constant_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; GFX9-LABEL: name: test_load_constant_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i64), align 1, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -934,28 +934,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_constant_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_constant_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p4) :: (load (i96), align 16, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -968,28 +968,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 8, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_constant_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 8, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_constant_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 8, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p4) :: (load (i96), align 8, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1002,28 +1002,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_constant_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_constant_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p4) :: (load (i96), align 4, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1036,91 +1036,91 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_constant_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_constant_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p4) :: (load (i96), align 2, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1133,169 +1133,169 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_constant_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_constant_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p4) :: (load (i96), align 1, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1308,43 +1308,43 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i32) from unknown-address + 16, addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; VI-LABEL: name: test_load_constant_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i32) from unknown-address + 16, addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; GFX9-LABEL: name: test_load_constant_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; GFX9-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i32) from unknown-address + 16, addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; GFX9-NEXT: S_NOP 0, implicit [[BITCAST]](i160) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 4) - S_NOP 0, implicit %1 + %1:_(i160) = G_LOAD %0(p4) :: (load (i160), align 4, addrspace 4) + S_NOP 0, implicit %1(i160) ... --- @@ -1357,54 +1357,54 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) - %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4) - %2:_(s256) = G_IMPLICIT_DEF - %3:_(s256) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(i224) = G_LOAD %0(p4) :: (load (i224), align 4, addrspace 4) + %2:_(i256) = G_IMPLICIT_DEF + %3:_(i256) = G_INSERT %2, %1(i224), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(i256) ... @@ -1418,28 +1418,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_constant_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_constant_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p4) :: (load (i128), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -1452,28 +1452,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_constant_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_constant_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p4) :: (load (i128), align 4, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -1486,214 +1486,214 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_constant_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_constant_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p4) :: (load (i128), align 1, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -1706,28 +1706,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), align 16, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; VI-LABEL: name: test_load_constant_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), align 16, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; GFX9-LABEL: name: test_load_constant_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), align 16, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(i256) = G_LOAD %0(p4) :: (load (i256), align 16, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... --- @@ -1757,8 +1757,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p4) :: (load (p1), addrspace 4) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -1788,8 +1788,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p4) :: (load (p1), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -1802,136 +1802,136 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; VI-LABEL: name: test_load_constant_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX9-LABEL: name: test_load_constant_p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p4) :: (load (p1), align 1, addrspace 4) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -1960,9 +1960,9 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) - %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4) - $vgpr0 = COPY %1 + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(p3) = G_LOAD %0(p4) :: (load (p3), addrspace 4) + $vgpr0 = COPY %1(p3) ... --- @@ -1992,8 +1992,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p4) :: (load (p4), addrspace 4) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -2023,8 +2023,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p4) :: (load (p4), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -2037,82 +2037,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; VI-LABEL: name: test_load_constant_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; GFX9-LABEL: name: test_load_constant_p4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p4) :: (load (p4), align 2, addrspace 4) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -2125,136 +2125,136 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; VI-LABEL: name: test_load_constant_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; GFX9-LABEL: name: test_load_constant_p4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p4) :: (load (p4), align 1, addrspace 4) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -2284,8 +2284,8 @@ body: | ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 4) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p4) :: (load (p5), addrspace 4) + $vgpr0 = COPY %1(p5) ... --- @@ -2298,46 +2298,46 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_constant_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-LABEL: name: test_load_constant_p5_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 4) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p4) :: (load (p5), align 2, addrspace 4) + $vgpr0 = COPY %1(p5) ... --- @@ -2350,76 +2350,76 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_constant_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-LABEL: name: test_load_constant_p5_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 4) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p4) :: (load (p5), align 1, addrspace 4) + $vgpr0 = COPY %1(p5) ... --- @@ -2432,27 +2432,27 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 4) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p4) :: (load (<2 x i8>), align 4, addrspace 4) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -2465,27 +2465,27 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 4) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p4) :: (load (<2 x i8>), addrspace 4) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -2498,45 +2498,45 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_constant_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_constant_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 4) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p4) :: (load (<2 x i8>), align 1, addrspace 4) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -2549,98 +2549,98 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_constant_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_constant_v3s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 4) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p4) :: (load (<3 x i8>), align 4, addrspace 4) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -2653,128 +2653,128 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; VI-LABEL: name: test_load_constant_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX9-LABEL: name: test_load_constant_v3s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 4) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p4) :: (load (<3 x i8>), align 1, addrspace 4) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -2787,26 +2787,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_constant_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_constant_v4s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 4) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p4) :: (load (<4 x i8>), addrspace 4) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -2819,44 +2819,44 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_constant_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_constant_v4s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 4) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p4) :: (load (<4 x i8>), align 2, addrspace 4) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -2869,74 +2869,74 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_constant_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_constant_v4s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 4) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p4) :: (load (<4 x i8>), align 1, addrspace 4) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -2949,26 +2949,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_constant_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v8s8_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 4) - %2:_(<2 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<8 x i8>) = G_LOAD %0(p4) :: (load (<8 x i8>), addrspace 4) + %2:_(<2 x i32>) = G_BITCAST %1(<8 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -2981,124 +2981,124 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32) - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x i32>) + ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32) + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x i32>) + ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x i32>) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x i8>), [[TRUNC1]](<4 x i8>), [[TRUNC2]](<4 x i8>), [[TRUNC3]](<4 x i8>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x i8>) ; ; VI-LABEL: name: test_load_constant_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x i32>) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x i32>) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x i32>) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x i8>), [[TRUNC1]](<4 x i8>), [[TRUNC2]](<4 x i8>), [[TRUNC3]](<4 x i8>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x i8>) ; ; GFX9-LABEL: name: test_load_constant_v16s8_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) - ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC15]](s16), [[TRUNC16]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC17]](s16), [[TRUNC18]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC4]](<4 x s8>), [[TRUNC9]](<4 x s8>), [[TRUNC14]](<4 x s8>), [[TRUNC19]](<4 x s8>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC7]](i16), [[TRUNC8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC12]](i16), [[TRUNC13]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC15]](i16), [[TRUNC16]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC17]](i16), [[TRUNC18]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x i8>) = G_CONCAT_VECTORS [[TRUNC4]](<4 x i8>), [[TRUNC9]](<4 x i8>), [[TRUNC14]](<4 x i8>), [[TRUNC19]](<4 x i8>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x i8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<16 x i8>) = G_LOAD %0(p4) :: (load (<16 x i8>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<16 x i8>) ... --- @@ -3111,208 +3111,208 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32) - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; CI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32) - ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>) - ; CI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; CI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32) - ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>) - ; CI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; CI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; CI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32) - ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>) - ; CI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; CI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; CI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; CI-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32) - ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<8 x i32>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x i32>) + ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32) + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x i32>) + ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x i32>) + ; CI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C]](i32) + ; CI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C1]](i32) + ; CI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[LSHR12]](i32), [[LSHR13]](i32), [[LSHR14]](i32) + ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x i32>) + ; CI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; CI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C1]](i32) + ; CI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV5]](i32), [[LSHR15]](i32), [[LSHR16]](i32), [[LSHR17]](i32) + ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x i32>) + ; CI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C]](i32) + ; CI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C1]](i32) + ; CI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV6]](i32), [[LSHR18]](i32), [[LSHR19]](i32), [[LSHR20]](i32) + ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x i32>) + ; CI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C]](i32) + ; CI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C1]](i32) + ; CI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C2]](i32) + ; CI-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV7]](i32), [[LSHR21]](i32), [[LSHR22]](i32), [[LSHR23]](i32) + ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x i32>) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x i8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x i8>), [[TRUNC1]](<4 x i8>), [[TRUNC2]](<4 x i8>), [[TRUNC3]](<4 x i8>), [[TRUNC4]](<4 x i8>), [[TRUNC5]](<4 x i8>), [[TRUNC6]](<4 x i8>), [[TRUNC7]](<4 x i8>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x i8>) ; ; VI-LABEL: name: test_load_constant_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<8 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV1]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x i32>) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x i32>) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x i32>) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C1]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[LSHR12]](i32), [[LSHR13]](i32), [[LSHR14]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x i32>) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C1]](i32) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV5]](i32), [[LSHR15]](i32), [[LSHR16]](i32), [[LSHR17]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x i32>) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C]](i32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C1]](i32) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV6]](i32), [[LSHR18]](i32), [[LSHR19]](i32), [[LSHR20]](i32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x i32>) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C]](i32) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C1]](i32) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV7]](i32), [[LSHR21]](i32), [[LSHR22]](i32), [[LSHR23]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x i32>) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x i8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x i8>), [[TRUNC1]](<4 x i8>), [[TRUNC2]](<4 x i8>), [[TRUNC3]](<4 x i8>), [[TRUNC4]](<4 x i8>), [[TRUNC5]](<4 x i8>), [[TRUNC6]](<4 x i8>), [[TRUNC7]](<4 x i8>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x i8>) ; ; GFX9-LABEL: name: test_load_constant_v32s8_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) - ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC15]](s16), [[TRUNC16]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC17]](s16), [[TRUNC18]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; GFX9-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; GFX9-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; GFX9-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; GFX9-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC20]](s16), [[TRUNC21]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC22]](s16), [[TRUNC23]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR8]](<2 x s16>), [[BUILD_VECTOR9]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC24:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>) - ; GFX9-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; GFX9-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; GFX9-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; GFX9-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; GFX9-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC25]](s16), [[TRUNC26]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC27]](s16), [[TRUNC28]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR10]](<2 x s16>), [[BUILD_VECTOR11]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC29:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>) - ; GFX9-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; GFX9-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; GFX9-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; GFX9-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; GFX9-NEXT: [[TRUNC33:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC30]](s16), [[TRUNC31]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC32]](s16), [[TRUNC33]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR12]](<2 x s16>), [[BUILD_VECTOR13]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC34:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>) - ; GFX9-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; GFX9-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC35:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; GFX9-NEXT: [[TRUNC36:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; GFX9-NEXT: [[TRUNC37:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR22]](s32) - ; GFX9-NEXT: [[TRUNC38:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC35]](s16), [[TRUNC36]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC37]](s16), [[TRUNC38]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR14]](<2 x s16>), [[BUILD_VECTOR15]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC39:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC4]](<4 x s8>), [[TRUNC9]](<4 x s8>), [[TRUNC14]](<4 x s8>), [[TRUNC19]](<4 x s8>), [[TRUNC24]](<4 x s8>), [[TRUNC29]](<4 x s8>), [[TRUNC34]](<4 x s8>), [[TRUNC39]](<4 x s8>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<8 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC7]](i16), [[TRUNC8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x i16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC12]](i16), [[TRUNC13]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x i16>) + ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[UV3]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC15]](i16), [[TRUNC16]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC17]](i16), [[TRUNC18]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x i16>) + ; GFX9-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C]](i32) + ; GFX9-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; GFX9-NEXT: [[TRUNC21:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; GFX9-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; GFX9-NEXT: [[TRUNC23:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR14]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC20]](i16), [[TRUNC21]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC22]](i16), [[TRUNC23]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR8]](<2 x i16>), [[BUILD_VECTOR9]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC24:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x i16>) + ; GFX9-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C]](i32) + ; GFX9-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[UV5]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC25:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; GFX9-NEXT: [[TRUNC26:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; GFX9-NEXT: [[TRUNC27:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR16]](i32) + ; GFX9-NEXT: [[TRUNC28:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR17]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC25]](i16), [[TRUNC26]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC27]](i16), [[TRUNC28]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR10]](<2 x i16>), [[BUILD_VECTOR11]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC29:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x i16>) + ; GFX9-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C]](i32) + ; GFX9-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[UV6]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC30:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; GFX9-NEXT: [[TRUNC31:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; GFX9-NEXT: [[TRUNC32:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR19]](i32) + ; GFX9-NEXT: [[TRUNC33:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR20]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC30]](i16), [[TRUNC31]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC32]](i16), [[TRUNC33]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR12]](<2 x i16>), [[BUILD_VECTOR13]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC34:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x i16>) + ; GFX9-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C]](i32) + ; GFX9-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[UV7]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC35:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; GFX9-NEXT: [[TRUNC36:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; GFX9-NEXT: [[TRUNC37:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR22]](i32) + ; GFX9-NEXT: [[TRUNC38:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR23]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC35]](i16), [[TRUNC36]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC37]](i16), [[TRUNC38]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR14]](<2 x i16>), [[BUILD_VECTOR15]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC39:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x i8>) = G_CONCAT_VECTORS [[TRUNC4]](<4 x i8>), [[TRUNC9]](<4 x i8>), [[TRUNC14]](<4 x i8>), [[TRUNC19]](<4 x i8>), [[TRUNC24]](<4 x i8>), [[TRUNC29]](<4 x i8>), [[TRUNC34]](<4 x i8>), [[TRUNC39]](<4 x i8>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x i8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<32 x i8>) = G_LOAD %0(p4) :: (load (<32 x i8>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<32 x i8>) ... --- @@ -3326,25 +3326,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p4) :: (load (<2 x i16>), addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; VI-LABEL: name: test_load_constant_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p4) :: (load (<2 x i16>), addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v2s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p4) :: (load (<2 x i16>), addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p4) :: (load (<2 x i16>), addrspace 4) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -3357,51 +3357,51 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_constant_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v2s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 4) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p4) :: (load (<2 x i16>), align 2, addrspace 4) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -3414,81 +3414,81 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_constant_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v2s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 4) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p4) :: (load (<2 x i16>), align 1, addrspace 4) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -3501,80 +3501,80 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 4) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p4) :: (load (<3 x i16>), align 8, addrspace 4) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -3587,105 +3587,105 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, align 4, addrspace 4) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, align 4, addrspace 4) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, align 4, addrspace 4) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p4) :: (load (<3 x i16>), align 4, addrspace 4) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -3698,105 +3698,105 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 4) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p4) :: (load (<3 x i16>), align 2, addrspace 4) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -3809,147 +3809,147 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 4) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p4) :: (load (<3 x i16>), align 1, addrspace 4) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -3962,25 +3962,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; VI-LABEL: name: test_load_constant_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p4) :: (load (<4 x i16>), addrspace 4) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -3993,25 +3993,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), align 4, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; VI-LABEL: name: test_load_constant_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), align 4, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p4) :: (load (<4 x i16>), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p4) :: (load (<4 x i16>), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -4024,85 +4024,85 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p4) :: (load (<4 x i16>), align 2, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -4115,139 +4115,139 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_constant_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p4) :: (load (<4 x i16>), align 1, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -4260,28 +4260,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<8 x i16>) = G_LOAD %0(p4) :: (load (<8 x i16>), align 8, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -4294,25 +4294,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_constant_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -4325,25 +4325,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_constant_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -4356,67 +4356,67 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_constant_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 2, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -4429,121 +4429,121 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_constant_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 1, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -4556,25 +4556,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_load_constant_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (load (<3 x i32>), align 16, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... @@ -4589,25 +4589,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_load_constant_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (load (<3 x i32>), align 4, addrspace 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -4620,25 +4620,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_constant_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p4) :: (load (<4 x i32>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -4651,25 +4651,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_constant_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p4) :: (load (<4 x i32>), align 8, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -4682,25 +4682,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_constant_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p4) :: (load (<4 x i32>), align 4, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -4713,25 +4713,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; VI-LABEL: name: test_load_constant_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<8 x i32>) = G_LOAD %0(p4) :: (load (<8 x i32>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -4744,25 +4744,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; VI-LABEL: name: test_load_constant_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v16s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<16 x i32>) = G_LOAD %0(p4) :: (load (<16 x i32>), align 32, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -4775,25 +4775,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i16>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; VI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i16>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; GFX9-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i16>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s16>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<16 x i32>) = G_LOAD %0(p4) :: (load (<16 x i16>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -4806,25 +4806,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; VI-LABEL: name: test_load_constant_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p4) :: (load (<2 x i64>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -4837,25 +4837,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 8, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; VI-LABEL: name: test_load_constant_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 8, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 8, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p4) :: (load (<2 x i64>), align 8, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -4868,25 +4868,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 4, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; VI-LABEL: name: test_load_constant_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 4, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p4) :: (load (<2 x i64>), align 4, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -4899,136 +4899,136 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i16) from unknown-address + 12, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i16) from unknown-address + 14, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_constant_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i16) from unknown-address + 12, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i16) from unknown-address + 14, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i16) from unknown-address + 12, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i16) from unknown-address + 14, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p4) :: (load (<2 x i64>), align 2, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -5041,238 +5041,238 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_constant_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p4) :: (load (<2 x i64>), align 1, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -5285,39 +5285,39 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p4) :: (load (<3 x i64>), align 32, addrspace 4) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -5330,48 +5330,48 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 8, addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p4) :: (load (i64) from unknown-address + 16, addrspace 4) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 8, addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p4) :: (load (i64) from unknown-address + 16, addrspace 4) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p4) :: (load (<2 x i64>), align 8, addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p4) :: (load (i64) from unknown-address + 16, addrspace 4) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p4) :: (load (<3 x i64>), align 8, addrspace 4) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -5384,348 +5384,348 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) - ; CI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 4) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 4) + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 4) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 4) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 4) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 4) + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 4) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 4) + ; CI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; CI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; CI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 4) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 4) + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 4) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 4) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 4) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 4) + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 4) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 4) + ; VI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; VI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; VI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) - ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) - ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) - ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) - ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 4) + ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 4) + ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 4) + ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 4) + ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p4) :: (load (<3 x i64>), align 1, addrspace 4) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -5738,25 +5738,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; VI-LABEL: name: test_load_constant_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v4s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p4) :: (load (<4 x i64>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -5769,25 +5769,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), align 8, addrspace 4) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; VI-LABEL: name: test_load_constant_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), align 8, addrspace 4) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v4s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p4) :: (load (<4 x i64>), align 8, addrspace 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p4) :: (load (<4 x i64>), align 8, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -5800,442 +5800,442 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) - ; CI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4) - ; CI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4) - ; CI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; CI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; CI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4) - ; CI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4) - ; CI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; CI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; CI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; CI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; CI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4) - ; CI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4) - ; CI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; CI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; CI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4) - ; CI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4) - ; CI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; CI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; CI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; CI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; CI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 4) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 4) + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 4) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 4) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 4) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 4) + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 4) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 4) + ; CI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; CI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; CI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; CI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; CI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p4) :: (load (i8) from unknown-address + 24, addrspace 4) + ; CI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p4) :: (load (i8) from unknown-address + 25, addrspace 4) + ; CI-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; CI-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; CI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p4) :: (load (i8) from unknown-address + 26, addrspace 4) + ; CI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p4) :: (load (i8) from unknown-address + 27, addrspace 4) + ; CI-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; CI-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; CI-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; CI-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; CI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p4) :: (load (i8) from unknown-address + 28, addrspace 4) + ; CI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p4) :: (load (i8) from unknown-address + 29, addrspace 4) + ; CI-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; CI-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; CI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p4) :: (load (i8) from unknown-address + 30, addrspace 4) + ; CI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p4) :: (load (i8) from unknown-address + 31, addrspace 4) + ; CI-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; CI-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; CI-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; CI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; CI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; CI-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_constant_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4) - ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4) - ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; VI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4) - ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4) - ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4) - ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4) - ; VI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; VI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4) - ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4) - ; VI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 4) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 4) + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 4) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 4) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 4) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 4) + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 4) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 4) + ; VI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; VI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; VI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; VI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p4) :: (load (i8) from unknown-address + 24, addrspace 4) + ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p4) :: (load (i8) from unknown-address + 25, addrspace 4) + ; VI-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; VI-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p4) :: (load (i8) from unknown-address + 26, addrspace 4) + ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p4) :: (load (i8) from unknown-address + 27, addrspace 4) + ; VI-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; VI-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; VI-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; VI-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p4) :: (load (i8) from unknown-address + 28, addrspace 4) + ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p4) :: (load (i8) from unknown-address + 29, addrspace 4) + ; VI-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; VI-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p4) :: (load (i8) from unknown-address + 30, addrspace 4) + ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p4) :: (load (i8) from unknown-address + 31, addrspace 4) + ; VI-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; VI-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; VI-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; VI-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_constant_v4s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) - ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) - ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) - ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) - ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4) - ; GFX9-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; GFX9-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; GFX9-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4) - ; GFX9-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; GFX9-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; GFX9-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; GFX9-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; GFX9-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4) - ; GFX9-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; GFX9-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; GFX9-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4) - ; GFX9-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; GFX9-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; GFX9-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; GFX9-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 4) + ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 4) + ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 4) + ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 4) + ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; GFX9-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; GFX9-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p4) :: (load (i8) from unknown-address + 24, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p4) :: (load (i8) from unknown-address + 25, addrspace 4) + ; GFX9-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; GFX9-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; GFX9-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p4) :: (load (i8) from unknown-address + 26, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p4) :: (load (i8) from unknown-address + 27, addrspace 4) + ; GFX9-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; GFX9-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; GFX9-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; GFX9-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; GFX9-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p4) :: (load (i8) from unknown-address + 28, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p4) :: (load (i8) from unknown-address + 29, addrspace 4) + ; GFX9-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; GFX9-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; GFX9-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p4) :: (load (i8) from unknown-address + 30, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p4) :: (load (i8) from unknown-address + 31, addrspace 4) + ; GFX9-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; GFX9-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; GFX9-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; GFX9-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p4) :: (load (<4 x i64>), align 1, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -6248,28 +6248,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; VI-LABEL: name: test_load_constant_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; GFX9-LABEL: name: test_load_constant_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<2 x i128>) = G_LOAD %0(p4) :: (load (<2 x i128>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<2 x i128>) ... --- @@ -6282,28 +6282,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_constant_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-LABEL: name: test_load_constant_v2p1_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p4) :: (load (<2 x p1>), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -6316,28 +6316,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_constant_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-LABEL: name: test_load_constant_v2p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 8, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p4) :: (load (<2 x p1>), align 8, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -6350,28 +6350,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_constant_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-LABEL: name: test_load_constant_v2p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p4) :: (load (<2 x p1>), align 4, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -6384,214 +6384,214 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_constant_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-LABEL: name: test_load_constant_v2p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 4) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 4) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 4) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 4) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p4) :: (load (<2 x p1>), align 1, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -6604,28 +6604,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_constant_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-LABEL: name: test_load_constant_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p4) :: (load (<2 x p3>), addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -6638,28 +6638,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_constant_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-LABEL: name: test_load_constant_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p4) :: (load (<2 x p3>), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -6672,124 +6672,124 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_constant_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-LABEL: name: test_load_constant_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 4) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 4) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 4) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 4) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 4) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p4) :: (load (<2 x p3>), align 1, addrspace 4) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -6802,25 +6802,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p4) :: (load (i8), align 4, addrspace 4) + $vgpr0 = COPY %1(i32) ... --- @@ -6833,25 +6833,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p4) :: (load (i16), align 4, addrspace 4) + $vgpr0 = COPY %1(i32) ... --- @@ -6865,28 +6865,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i8), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -6899,28 +6899,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i16), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -6933,28 +6933,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i32), addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -6967,37 +6967,37 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p4) :: (load (i32), addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -7010,28 +7010,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i16), align 4, addrspace 4) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i16), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -7044,28 +7044,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p4) :: (load (i8), align 4, addrspace 4) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p4) :: (load (i8), align 4, addrspace 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -7078,121 +7078,121 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -7205,67 +7205,67 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -7278,25 +7278,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 1) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p4) :: (load (<2 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -7309,25 +7309,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (load (<3 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -7340,25 +7340,25 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p4) :: (load (<4 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -7371,316 +7371,316 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1) - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1) - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 1) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 1) + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 1) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 1) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 1) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 1) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 1) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 1) + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1) - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 1) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 1) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 1) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 1) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 1) + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1) - ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1) - ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1) - ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i8), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p4) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p4) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p4) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p4) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p4) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p4) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p4) :: (load (i8) from unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p4) :: (load (i8) from unknown-address + 17, addrspace 1) + ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p4) :: (load (i8) from unknown-address + 18, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p4) :: (load (i8) from unknown-address + 19, addrspace 1) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p4) :: (load (i8) from unknown-address + 20, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p4) :: (load (i8) from unknown-address + 21, addrspace 1) + ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p4) :: (load (i8) from unknown-address + 22, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p4) :: (load (i8) from unknown-address + 23, addrspace 1) + ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p4) :: (load (<2 x i96>), align 1, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -7693,166 +7693,166 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i16) from unknown-address + 12, addrspace 1) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i16) from unknown-address + 14, addrspace 1) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i16) from unknown-address + 16, addrspace 1) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p4) :: (load (i16) from unknown-address + 18, addrspace 1) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i16) from unknown-address + 20, addrspace 1) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i16) from unknown-address + 22, addrspace 1) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i16) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i16) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i16) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p4) :: (load (i16) from unknown-address + 18, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i16) from unknown-address + 20, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i16) from unknown-address + 22, addrspace 1) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s16), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p4) :: (load (i16), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p4) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p4) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p4) :: (load (i16) from unknown-address + 12, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p4) :: (load (i16) from unknown-address + 14, addrspace 1) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p4) :: (load (i16) from unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p4) :: (load (i16) from unknown-address + 18, addrspace 1) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p4) :: (load (i16) from unknown-address + 20, addrspace 1) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p4) :: (load (i16) from unknown-address + 22, addrspace 1) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p4) :: (load (<2 x i96>), align 2, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -7865,52 +7865,52 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p4) :: (load (<2 x i96>), align 4, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -7923,52 +7923,52 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p4) :: (load (<2 x i96>), align 16, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -7981,28 +7981,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i512) = G_BITCAST [[LOAD]](<16 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](i512) ; ; VI-LABEL: name: test_load_constant_s512_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i512) = G_BITCAST [[LOAD]](<16 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](i512) ; ; GFX9-LABEL: name: test_load_constant_s512_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i512) = G_BITCAST [[LOAD]](<16 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](i512) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s512) = G_LOAD %0 :: (load (s512), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(i512) = G_LOAD %0(p4) :: (load (i512), align 32, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) ... --- @@ -8015,26 +8015,26 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i128>) = G_BITCAST [[LOAD]](<16 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x i128>) ; ; VI-LABEL: name: test_load_constant_v4s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i128>) = G_BITCAST [[LOAD]](<16 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x i128>) ; ; GFX9-LABEL: name: test_load_constant_v4s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), align 32, addrspace 4) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i128>) = G_BITCAST [[LOAD]](<16 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x i128>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s128>) = G_LOAD %0 :: (load (<4 x s128>), align 32, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<4 x i128>) = G_LOAD %0(p4) :: (load (<4 x i128>), align 32, addrspace 4) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<4 x i128>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 8e299a19dfdbe..37765ec96cd0f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -23,77 +23,77 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_flat_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s1_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s1_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX12-LABEL: name: test_load_flat_s1_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s1_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s1_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 0) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i1) = G_LOAD %0(p0) :: (load (i1)) + %2:_(i32) = G_ZEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -106,77 +106,77 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_flat_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s2_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s2_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX12-LABEL: name: test_load_flat_s2_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s2_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s2_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s2_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 0) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i2) = G_LOAD %0(p0) :: (load (i2)) + %2:_(i32) = G_ZEXT %1(i2) + $vgpr0 = COPY %2(i32) ... --- @@ -189,61 +189,61 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s8_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s8_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s8_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s8_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s8_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p0) :: (load (i8), align 4) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -256,61 +256,61 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s8_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s8_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s8_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s8_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p0) :: (load (i8)) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -323,61 +323,61 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s16_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s16_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s16_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s16_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s16_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p0) :: (load (i16), align 4) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -390,61 +390,61 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s16_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s16_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s16_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s16_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s16_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p0) :: (load (i16)) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -457,91 +457,91 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_flat_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s16_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s16_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s16_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s16_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p0) :: (load (i16), align 1) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -554,60 +554,60 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s32_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s32_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s32_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s32_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s32_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p0) :: (load (i32)) + $vgpr0 = COPY %1(i32) ... --- @@ -620,90 +620,90 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_flat_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s32_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 2) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s32_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 2) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s32_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 2) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s32_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s32_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p0) :: (load (i32), align 2) + $vgpr0 = COPY %1(i32) ... --- @@ -716,140 +716,140 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_flat_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s32_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 1) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s32_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 1) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s32_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 1) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s32_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s32_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p0) :: (load (i32), align 1) + $vgpr0 = COPY %1(i32) ... --- @@ -862,105 +862,105 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C4]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](i64) ; ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C4]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](i64) ; ; GFX9PLUS-LABEL: name: test_load_flat_s48_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; GFX11PLUS-LABEL: name: test_load_flat_s48_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; GFX12-LABEL: name: test_load_flat_s48_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s48_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s48_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s48_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 0) - %2:_(s64) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i48) = G_LOAD %0(p0) :: (load (i48), align 8) + %2:_(i64) = G_ZEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -973,68 +973,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_flat_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9PLUS-LABEL: name: test_load_flat_s64_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11PLUS-LABEL: name: test_load_flat_s64_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_flat_s64_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s64_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s64_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s64_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i64)) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1047,68 +1047,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_flat_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9PLUS-LABEL: name: test_load_flat_s64_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11PLUS-LABEL: name: test_load_flat_s64_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_flat_s64_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s64_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s64_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s64_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i64), align 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1121,142 +1121,142 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_flat_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9PLUS-LABEL: name: test_load_flat_s64_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 2) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11PLUS-LABEL: name: test_load_flat_s64_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 2) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_flat_s64_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 2) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s64_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s64_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s64_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i64), align 2) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1269,232 +1269,232 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_flat_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9PLUS-LABEL: name: test_load_flat_s64_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 1) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11PLUS-LABEL: name: test_load_flat_s64_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 1) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_flat_s64_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p0) :: (load (i64), align 1) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s64_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s64_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i64), align 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1507,82 +1507,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_flat_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9PLUS-LABEL: name: test_load_flat_s96_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11PLUS-LABEL: name: test_load_flat_s96_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_flat_s96_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s96_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p0) :: (load (i96), align 16) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1595,82 +1595,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_flat_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9PLUS-LABEL: name: test_load_flat_s96_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 8) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11PLUS-LABEL: name: test_load_flat_s96_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 8) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_flat_s96_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s96_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 8) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p0) :: (load (i96), align 8) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1683,82 +1683,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_flat_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9PLUS-LABEL: name: test_load_flat_s96_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11PLUS-LABEL: name: test_load_flat_s96_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_flat_s96_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s96_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p0) :: (load (i96), align 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1771,173 +1771,173 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_flat_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9PLUS-LABEL: name: test_load_flat_s96_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 2) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 2) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11PLUS-LABEL: name: test_load_flat_s96_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 2) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 2) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_flat_s96_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 2) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 2) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s96_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p0) :: (load (i96), align 2) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1950,303 +1950,303 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_flat_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9PLUS-LABEL: name: test_load_flat_s96_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 1) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 1) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11PLUS-LABEL: name: test_load_flat_s96_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 1) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 1) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_flat_s96_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 1) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s96_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p0) :: (load (i96), align 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -2259,124 +2259,124 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; VI-LABEL: name: test_load_flat_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; GFX9PLUS-LABEL: name: test_load_flat_s160_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; GFX9PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; GFX9PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; GFX11PLUS-LABEL: name: test_load_flat_s160_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; GFX11PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; GFX11PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; GFX12-LABEL: name: test_load_flat_s160_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; GFX12-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 16) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; GFX12-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s160_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s160_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s160_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; UNALIGNED_GFX12-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; UNALIGNED_GFX12-NEXT: S_NOP 0, implicit [[BITCAST]](i160) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 0) - S_NOP 0, implicit %1 + %1:_(i160) = G_LOAD %0(p0) :: (load (i160), align 4) + S_NOP 0, implicit %1(i160) ... --- @@ -2389,160 +2389,160 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; GFX9PLUS-LABEL: name: test_load_flat_s224_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x i32>) from unknown-address + 16, align 4) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; GFX11PLUS-LABEL: name: test_load_flat_s224_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX11PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x i32>) from unknown-address + 16, align 4) + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX11PLUS-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; GFX12-LABEL: name: test_load_flat_s224_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX12-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x i32>) from unknown-address + 16, align 4) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX12-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s224_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x i32>) from unknown-address + 16, align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s224_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x i32>) from unknown-address + 16, align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s224_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x i32>) from unknown-address + 16, align 4) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0) - %2:_(s256) = G_IMPLICIT_DEF - %3:_(s256) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(i224) = G_LOAD %0(p0) :: (load (i224), align 4) + %2:_(i256) = G_IMPLICIT_DEF + %3:_(i256) = G_INSERT %2, %1(i224), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(i256) ... @@ -2556,88 +2556,88 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_flat_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9PLUS-LABEL: name: test_load_flat_s128_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11PLUS-LABEL: name: test_load_flat_s128_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_flat_s128_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s128_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s128_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s128_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p0) :: (load (i128)) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -2650,88 +2650,88 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_flat_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9PLUS-LABEL: name: test_load_flat_s128_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11PLUS-LABEL: name: test_load_flat_s128_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_flat_s128_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s128_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s128_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s128_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p0) :: (load (i128), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -2744,378 +2744,378 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_flat_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9PLUS-LABEL: name: test_load_flat_s128_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 1) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11PLUS-LABEL: name: test_load_flat_s128_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 1) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_flat_s128_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 1) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s128_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s128_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s128_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p0) :: (load (i128), align 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -3128,136 +3128,136 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; VI-LABEL: name: test_load_flat_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; GFX9PLUS-LABEL: name: test_load_flat_s256_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; GFX11PLUS-LABEL: name: test_load_flat_s256_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; GFX12-LABEL: name: test_load_flat_s256_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s256_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s256_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s256_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(i256) = G_LOAD %0(p0) :: (load (i256), align 16) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... --- @@ -3270,22 +3270,22 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_flat_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9PLUS-LABEL: name: test_load_flat_p1_align8 @@ -3330,8 +3330,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1)) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p0) :: (load (p1)) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -3344,22 +3344,22 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_flat_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9PLUS-LABEL: name: test_load_flat_p1_align4 @@ -3404,8 +3404,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p0) :: (load (p1), align 4) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -3418,78 +3418,78 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_flat_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9PLUS-LABEL: name: test_load_flat_p1_align1 @@ -3517,136 +3517,136 @@ body: | ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_p1_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p0) :: (load (p1), align 1) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -3710,9 +3710,9 @@ body: | ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) - %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) - $vgpr0 = COPY %1 + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p3) = G_LOAD %0(p0) :: (load (p3)) + $vgpr0 = COPY %1(p3) ... --- @@ -3725,22 +3725,22 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; VI-LABEL: name: test_load_flat_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; GFX9PLUS-LABEL: name: test_load_flat_p4_align8 @@ -3785,8 +3785,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4)) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p0) :: (load (p4)) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -3799,22 +3799,22 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; VI-LABEL: name: test_load_flat_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; GFX9PLUS-LABEL: name: test_load_flat_p4_align4 @@ -3859,8 +3859,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p0) :: (load (p4), align 4) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -3873,42 +3873,42 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; VI-LABEL: name: test_load_flat_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; GFX9PLUS-LABEL: name: test_load_flat_p4_align2 @@ -3936,82 +3936,82 @@ body: | ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_p4_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p4_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p0) :: (load (p4), align 2) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -4024,78 +4024,78 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; VI-LABEL: name: test_load_flat_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) ; ; GFX9PLUS-LABEL: name: test_load_flat_p4_align1 @@ -4123,136 +4123,136 @@ body: | ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_p4_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p4_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p0) :: (load (p4), align 1) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -4317,8 +4317,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p0) :: (load (p5)) + $vgpr0 = COPY %1(p5) ... --- @@ -4331,28 +4331,28 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_flat_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9PLUS-LABEL: name: test_load_flat_p5_align2 @@ -4380,46 +4380,46 @@ body: | ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_p5_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p5_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 0) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p0) :: (load (p5), align 2) + $vgpr0 = COPY %1(p5) ... --- @@ -4432,48 +4432,48 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_flat_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9PLUS-LABEL: name: test_load_flat_p5_align1 @@ -4501,76 +4501,76 @@ body: | ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_p5_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p5_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 0) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p0) :: (load (p5), align 1) + $vgpr0 = COPY %1(p5) ... --- @@ -4583,62 +4583,62 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s8_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s8_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_v2s8_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s8_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s8_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 0) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p0) :: (load (<2 x i8>), align 4) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -4651,62 +4651,62 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s8_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s8_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_v2s8_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s8_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s8_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s8_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 0) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p0) :: (load (<2 x i8>)) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -4719,92 +4719,92 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_flat_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s8_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s8_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_v2s8_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s8_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 0) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p0) :: (load (<2 x i8>), align 1) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -4817,248 +4817,248 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_flat_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s8_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s8_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX12-LABEL: name: test_load_flat_v3s8_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s8_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s8_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 0) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p0) :: (load (<3 x i8>), align 4) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -5071,313 +5071,313 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; VI-LABEL: name: test_load_flat_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s8_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 2) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s8_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2) - ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 2) + ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX12-LABEL: name: test_load_flat_v3s8_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s8_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR4]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 0) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p0) :: (load (<3 x i8>), align 1) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -5390,61 +5390,61 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s8_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s8_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_v4s8_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s8_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s8_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 0) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p0) :: (load (<4 x i8>)) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -5457,91 +5457,91 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_flat_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s8_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 2) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s8_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 2) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_v4s8_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 2) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s8_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s8_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s8_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 0) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p0) :: (load (<4 x i8>), align 2) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -5554,141 +5554,141 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_flat_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s8_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 1) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s8_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 1) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_v4s8_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 1) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s8_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 0) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p0) :: (load (<4 x i8>), align 1) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -5701,69 +5701,69 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_flat_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v8s8_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v8s8_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v8s8_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v8s8_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v8s8_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v8s8_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 0) - %2:_(<2 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<8 x i8>) = G_LOAD %0(p0) :: (load (<8 x i8>)) + %2:_(<2 x i32>) = G_BITCAST %1(<8 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -5776,81 +5776,81 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_flat_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v16s8_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v16s8_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v16s8_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v16s8_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v16s8_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v16s8_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 0) - %2:_(<4 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:_(<16 x i8>) = G_LOAD %0(p0) :: (load (<16 x i8>)) + %2:_(<4 x i32>) = G_BITCAST %1(<16 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -5863,129 +5863,129 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; VI-LABEL: name: test_load_flat_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v32s8_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v32s8_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v32s8_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v32s8_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v32s8_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v32s8_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 0) - %2:_(<8 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + %1:_(<32 x i8>) = G_LOAD %0(p0) :: (load (<32 x i8>)) + %2:_(<8 x i32>) = G_BITCAST %1(<32 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<8 x i32>) ... --- @@ -5999,60 +5999,60 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; VI-LABEL: name: test_load_flat_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s16_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s16_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v2s16_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s16_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s16_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p0) :: (load (<2 x i16>)) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -6065,98 +6065,98 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_flat_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s16_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 2) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 2) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s16_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 2) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 2) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v2s16_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 2) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s16_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s16_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 0) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p0) :: (load (<2 x i16>), align 2) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -6169,148 +6169,148 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_flat_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s16_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 1) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 1) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s16_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 1) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 1) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v2s16_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 1) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s16_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 0) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p0) :: (load (<2 x i16>), align 1) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -6323,204 +6323,204 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s16_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v3s16_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s16_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s16_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s16_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 0) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p0) :: (load (<3 x i16>), align 8) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -6533,246 +6533,246 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s16_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v3s16_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s16_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s16_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 4) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p0) :: (load (<3 x i16>), align 4) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -6785,250 +6785,250 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s16_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v3s16_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s16_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s16_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 0) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p0) :: (load (<3 x i16>), align 2) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -7041,320 +7041,320 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2, align 1) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2, align 1) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 1) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s16_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2, align 1) - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2, align 1) + ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 1) + ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v3s16_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2, align 1) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 1) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2, align 1) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4, align 1) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s16_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 0) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p0) :: (load (<3 x i16>), align 1) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -7367,68 +7367,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i16>) from unknown-address + 4) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_flat_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i16>) from unknown-address + 4) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s16_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s16_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v4s16_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s16_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s16_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s16_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p0) :: (load (<4 x i16>)) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -7441,68 +7441,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i16>) from unknown-address + 4) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_flat_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p0) :: (load (<2 x i16>)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i16>) from unknown-address + 4) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s16_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s16_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v4s16_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s16_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s16_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p0) :: (load (<4 x i16>), align 4) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -7515,150 +7515,150 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C1]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s16_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 2) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 2) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s16_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 2) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 2) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v4s16_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 2) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s16_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s16_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p0) :: (load (<4 x i16>), align 2) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -7671,240 +7671,240 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C3]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C3]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_flat_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C3]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C3]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s16_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 1) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 1) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s16_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 1) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 1) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v4s16_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p0) :: (load (<4 x i16>), align 1) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s16_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p0) :: (load (<4 x i16>), align 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -7917,88 +7917,88 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v8s16_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v8s16_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX12-LABEL: name: test_load_flat_v8s16_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v8s16_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v8s16_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v8s16_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<8 x i16>) = G_LOAD %0(p0) :: (load (<8 x i16>), align 8) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -8011,68 +8011,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_flat_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s32_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s32_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v2s32_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s32_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s32_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s32_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p0) :: (load (<2 x i32>)) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -8085,68 +8085,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_flat_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s32_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s32_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v2s32_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s32_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s32_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p0) :: (load (<2 x i32>), align 4) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... @@ -8160,68 +8160,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_flat_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s32_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s32_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v2s32_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s32_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s32_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p0) :: (load (<2 x i32>), align 4) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -8234,74 +8234,74 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_load_flat_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s32_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s32_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v3s32_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s32_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s32_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s32_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 16) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p0) :: (load (<3 x i32>), align 16) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... @@ -8316,74 +8316,74 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_load_flat_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s32_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s32_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v3s32_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s32_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s32_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p0) :: (load (<3 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p0) :: (load (<3 x i32>), align 4) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -8396,80 +8396,80 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_flat_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s32_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s32_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v4s32_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s32_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s32_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s32_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p0) :: (load (<4 x i32>)) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -8482,80 +8482,80 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_flat_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s32_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s32_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v4s32_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s32_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s32_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s32_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p0) :: (load (<4 x i32>), align 8) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -8568,80 +8568,80 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_flat_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s32_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s32_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v4s32_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s32_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s32_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p0) :: (load (<4 x i32>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -8654,128 +8654,128 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; VI-LABEL: name: test_load_flat_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v8s32_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v8s32_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v8s32_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v8s32_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v8s32_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v8s32_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<8 x i32>) = G_LOAD %0(p0) :: (load (<8 x i32>)) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -8788,212 +8788,212 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s32) from unknown-address + 32, align 32) - ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s32) from unknown-address + 36) - ; CI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s32) from unknown-address + 40, align 8) - ; CI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s32) from unknown-address + 44) - ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s32) from unknown-address + 48, align 16) - ; CI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s32) from unknown-address + 52) - ; CI-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s32) from unknown-address + 56, align 8) - ; CI-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64) - ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s32) from unknown-address + 60) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p0) :: (load (i32) from unknown-address + 32, align 32) + ; CI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p0) :: (load (i32) from unknown-address + 36) + ; CI-NEXT: [[C9:%[0-9]+]]:_(i64) = G_CONSTANT i64 40 + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](i64) + ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p0) :: (load (i32) from unknown-address + 40, align 8) + ; CI-NEXT: [[C10:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](i64) + ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i32) from unknown-address + 44) + ; CI-NEXT: [[C11:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](i64) + ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p0) :: (load (i32) from unknown-address + 48, align 16) + ; CI-NEXT: [[C12:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](i64) + ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p0) :: (load (i32) from unknown-address + 52) + ; CI-NEXT: [[C13:%[0-9]+]]:_(i64) = G_CONSTANT i64 56 + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](i64) + ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p0) :: (load (i32) from unknown-address + 56, align 8) + ; CI-NEXT: [[C14:%[0-9]+]]:_(i64) = G_CONSTANT i64 60 + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](i64) + ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i32) from unknown-address + 60) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; VI-LABEL: name: test_load_flat_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s32) from unknown-address + 32, align 32) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s32) from unknown-address + 36) - ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s32) from unknown-address + 40, align 8) - ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s32) from unknown-address + 44) - ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s32) from unknown-address + 48, align 16) - ; VI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s32) from unknown-address + 52) - ; VI-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s32) from unknown-address + 56, align 8) - ; VI-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64) - ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s32) from unknown-address + 60) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p0) :: (load (i32) from unknown-address + 32, align 32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 36 + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p0) :: (load (i32) from unknown-address + 36) + ; VI-NEXT: [[C9:%[0-9]+]]:_(i64) = G_CONSTANT i64 40 + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](i64) + ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p0) :: (load (i32) from unknown-address + 40, align 8) + ; VI-NEXT: [[C10:%[0-9]+]]:_(i64) = G_CONSTANT i64 44 + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](i64) + ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i32) from unknown-address + 44) + ; VI-NEXT: [[C11:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](i64) + ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p0) :: (load (i32) from unknown-address + 48, align 16) + ; VI-NEXT: [[C12:%[0-9]+]]:_(i64) = G_CONSTANT i64 52 + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](i64) + ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p0) :: (load (i32) from unknown-address + 52) + ; VI-NEXT: [[C13:%[0-9]+]]:_(i64) = G_CONSTANT i64 56 + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](i64) + ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p0) :: (load (i32) from unknown-address + 56, align 8) + ; VI-NEXT: [[C14:%[0-9]+]]:_(i64) = G_CONSTANT i64 60 + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](i64) + ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i32) from unknown-address + 60) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v16s32_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x i32>) from unknown-address + 32, align 32) + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x i32>) from unknown-address + 48) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v16s32_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x i32>) from unknown-address + 32, align 32) + ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x i32>) from unknown-address + 48) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX12-LABEL: name: test_load_flat_v16s32_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x i32>) from unknown-address + 32, align 32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x i32>) from unknown-address + 48) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v16s32_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x i32>) from unknown-address + 32, align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x i32>) from unknown-address + 48) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v16s32_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x i32>) from unknown-address + 32, align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x i32>) from unknown-address + 48) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v16s32_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x i32>) from unknown-address + 32, align 32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 48 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x i32>) from unknown-address + 48) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<16 x i32>) = G_LOAD %0(p0) :: (load (<16 x i32>), align 32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -9006,82 +9006,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_flat_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s64_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>)) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s64_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>)) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v2s64_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>)) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>)) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s64_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>)) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>)) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p0) :: (load (<2 x i64>)) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -9094,82 +9094,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_flat_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s64_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s64_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v2s64_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s64_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p0) :: (load (<2 x i64>), align 8) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -9182,82 +9182,82 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_flat_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s64_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 4) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s64_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 4) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v2s64_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 4) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s64_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p0) :: (load (<2 x i64>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -9270,229 +9270,229 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i16) from unknown-address + 12) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i16) from unknown-address + 14) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR3]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_flat_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i16) from unknown-address + 12) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i16) from unknown-address + 14) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s64_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 2) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 2) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s64_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 2) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 2) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v2s64_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 2) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i16) from unknown-address + 12) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i16) from unknown-address + 14) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s64_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i16) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i16) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i16) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i16) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i16) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i16) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p0) :: (load (<2 x i64>), align 2) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -9505,399 +9505,399 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_flat_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s64_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s64_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v2s64_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s64_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p0) :: (load (<2 x i64>), align 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -9910,142 +9910,142 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 16) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 16) + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v3s64_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 16) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 16) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p0) :: (load (<3 x i64>), align 32) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -10058,142 +10058,142 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 8) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 8) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 8) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 8) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v3s64_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p0) :: (load (<3 x i64>), align 8) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -10206,594 +10206,594 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR14]](i32), [[OR17]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR14]](i32), [[OR17]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 1) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 1) + ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v3s64_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p0) :: (load (i64) from unknown-address + 16, align 1) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX12-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; UNALIGNED_GFX12-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX12-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; UNALIGNED_GFX12-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX12-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p0) :: (load (<3 x i64>), align 1) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -10806,130 +10806,130 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_flat_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s64_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s64_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v4s64_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s64_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s64_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s64_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p0) :: (load (<4 x i64>)) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -10942,130 +10942,130 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 8) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 8) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_flat_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 8) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 8) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s64_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 8) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s64_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 8) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v4s64_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 8) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s64_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s64_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s64_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 8) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 8) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p0) :: (load (<4 x i64>), align 8) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -11078,738 +11078,738 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32) - ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) - ; CI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) - ; CI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; CI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD18]] - ; CI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) - ; CI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) - ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[ZEXTLOAD20]] - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[OR19]], [[C3]](s32) - ; CI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[SHL20]], [[OR18]] - ; CI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) - ; CI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) - ; CI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; CI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD21]] - ; CI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) - ; CI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) - ; CI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD23]] - ; CI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; CI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR20]](s32), [[OR23]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR14]](i32), [[OR17]](i32) + ; CI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (i8) from unknown-address + 24) + ; CI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (i8) from unknown-address + 25) + ; CI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; CI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD18]] + ; CI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (i8) from unknown-address + 26) + ; CI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p0) :: (load (i8) from unknown-address + 27) + ; CI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; CI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[ZEXTLOAD20]] + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(i32) = G_SHL [[OR19]], [[C3]](i32) + ; CI-NEXT: [[OR20:%[0-9]+]]:_(i32) = G_OR [[SHL20]], [[OR18]] + ; CI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (i8) from unknown-address + 28) + ; CI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (i8) from unknown-address + 29) + ; CI-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; CI-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD21]] + ; CI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (i8) from unknown-address + 30) + ; CI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p0) :: (load (i8) from unknown-address + 31) + ; CI-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD23]] + ; CI-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; CI-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; CI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR20]](i32), [[OR23]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_flat_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) - ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD18]] - ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) - ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[ZEXTLOAD20]] - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[OR19]], [[C3]](s32) - ; VI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[SHL20]], [[OR18]] - ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) - ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) - ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; VI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD21]] - ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) - ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) - ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD23]] - ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR20]](s32), [[OR23]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR14]](i32), [[OR17]](i32) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (i8) from unknown-address + 24) + ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (i8) from unknown-address + 25) + ; VI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; VI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD18]] + ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (i8) from unknown-address + 26) + ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p0) :: (load (i8) from unknown-address + 27) + ; VI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; VI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[ZEXTLOAD20]] + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(i32) = G_SHL [[OR19]], [[C3]](i32) + ; VI-NEXT: [[OR20:%[0-9]+]]:_(i32) = G_OR [[SHL20]], [[OR18]] + ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (i8) from unknown-address + 28) + ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (i8) from unknown-address + 29) + ; VI-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; VI-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD21]] + ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (i8) from unknown-address + 30) + ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p0) :: (load (i8) from unknown-address + 31) + ; VI-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD23]] + ; VI-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; VI-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; VI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR20]](i32), [[OR23]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v4s64_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 1) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 1) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v4s64_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 1) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 1) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_flat_v4s64_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 1) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p0) :: (load (<2 x i64>), align 1) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x i64>) from unknown-address + 16, align 1) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s64_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (i8) from unknown-address + 24) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (i8) from unknown-address + 25) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (i8) from unknown-address + 26) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p0) :: (load (i8) from unknown-address + 27) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (i8) from unknown-address + 28) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (i8) from unknown-address + 29) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (i8) from unknown-address + 30) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p0) :: (load (i8) from unknown-address + 31) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR20]](i64), [[OR27]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i64>), [[BUILD_VECTOR1]](<2 x i64>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v4s64_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (i8) from unknown-address + 24) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (i8) from unknown-address + 25) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (i8) from unknown-address + 26) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p0) :: (load (i8) from unknown-address + 27) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (i8) from unknown-address + 28) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (i8) from unknown-address + 29) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (i8) from unknown-address + 30) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p0) :: (load (i8) from unknown-address + 31) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR20]](i64), [[OR27]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i64>), [[BUILD_VECTOR1]](<2 x i64>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX12-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) - ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) - ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) - ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) - ; UNALIGNED_GFX12-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX12-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) - ; UNALIGNED_GFX12-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) - ; UNALIGNED_GFX12-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; UNALIGNED_GFX12-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) - ; UNALIGNED_GFX12-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) - ; UNALIGNED_GFX12-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; UNALIGNED_GFX12-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX12-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (i8) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (i8) from unknown-address + 17) + ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (i8) from unknown-address + 18) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p0) :: (load (i8) from unknown-address + 19) + ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (i8) from unknown-address + 20) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (i8) from unknown-address + 21) + ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (i8) from unknown-address + 22) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p0) :: (load (i8) from unknown-address + 23) + ; UNALIGNED_GFX12-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX12-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (i8) from unknown-address + 24) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (i8) from unknown-address + 25) + ; UNALIGNED_GFX12-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (i8) from unknown-address + 26) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p0) :: (load (i8) from unknown-address + 27) + ; UNALIGNED_GFX12-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; UNALIGNED_GFX12-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (i8) from unknown-address + 28) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (i8) from unknown-address + 29) + ; UNALIGNED_GFX12-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (i8) from unknown-address + 30) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p0) :: (load (i8) from unknown-address + 31) + ; UNALIGNED_GFX12-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; UNALIGNED_GFX12-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR20]](i64), [[OR27]](i64) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i64>), [[BUILD_VECTOR1]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p0) :: (load (<4 x i64>), align 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -11822,136 +11822,136 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; VI-LABEL: name: test_load_flat_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 16, align 16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 24, align 8) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p0) :: (load (i32) from unknown-address + 16, align 16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p0) :: (load (i32) from unknown-address + 20) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p0) :: (load (i32) from unknown-address + 24, align 8) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i32) from unknown-address + 28) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2s128_align32 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2s128_align32 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; GFX12-LABEL: name: test_load_flat_v2s128_align32 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s128_align32 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2s128_align32 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s128_align32 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x i32>) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<2 x i128>) = G_LOAD %0(p0) :: (load (<2 x i128>)) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<2 x i128>) ... --- @@ -11964,88 +11964,88 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_flat_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 16) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 16) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p1_align16 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p1_align16 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX12-LABEL: name: test_load_flat_v2p1_align16 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p1_align16 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p1_align16 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p1_align16 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>)) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load (<2 x p1>)) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -12058,88 +12058,88 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_flat_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8, align 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p1_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p1_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX12-LABEL: name: test_load_flat_v2p1_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p1_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p1_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p1_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 8) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load (<2 x p1>), align 8) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -12152,88 +12152,88 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_flat_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p0) :: (load (i32) from unknown-address + 8) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i32) from unknown-address + 12) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p1_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p1_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX12-LABEL: name: test_load_flat_v2p1_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p1_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p1_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p1_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load (<2 x p1>), align 4) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -12246,378 +12246,378 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_flat_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p1_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 1) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p1_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 1) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX12-LABEL: name: test_load_flat_v2p1_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p0) :: (load (<4 x i32>), align 1) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p1_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p1_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (i8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (i8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (i8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p0) :: (load (i8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (i8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (i8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (i8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p0) :: (load (i8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load (<2 x p1>), align 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -12630,76 +12630,76 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_flat_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32), align 8) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align8 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align8 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX12-LABEL: name: test_load_flat_v2p3_align8 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align8 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align8 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align8 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>)) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p0) :: (load (<2 x p3>)) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -12712,76 +12712,76 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_flat_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p0) :: (load (i32) from unknown-address + 4) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX12-LABEL: name: test_load_flat_v2p3_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 4) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p0) :: (load (<2 x p3>), align 4) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -12794,228 +12794,228 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_flat_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 1) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 1) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 1) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 1) + ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX12-LABEL: name: test_load_flat_v2p3_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p0) :: (load (<2 x i32>), align 1) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (i8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (i8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p0) :: (load (i8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (i8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (i8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (i8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p0) :: (load (i8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p0) :: (load (<2 x p3>), align 1) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -13028,60 +13028,60 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p0) :: (load (i8), align 4) + $vgpr0 = COPY %1(i32) ... --- @@ -13094,60 +13094,60 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p0) :: (load (i16), align 4) + $vgpr0 = COPY %1(i32) ... --- @@ -13161,68 +13161,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i8), align 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -13235,68 +13235,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i16), align 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -13309,68 +13309,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i32)) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -13383,92 +13383,92 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX11PLUS-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX11PLUS-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX12-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p0) :: (load (i32)) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -13481,68 +13481,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 4) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i16), align 4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -13555,68 +13555,68 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i8), align 4) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p0) :: (load (i8), align 4) + $vgpr0_vgpr1 = COPY %1(i64) ... # Make sure there's no crash on very high alignments @@ -13630,59 +13630,59 @@ body: | ; CI: liveins: $vgpr0_vgpr1 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_flat_s32_align536870912 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9PLUS-LABEL: name: test_load_flat_s32_align536870912 ; GFX9PLUS: liveins: $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11PLUS-LABEL: name: test_load_flat_s32_align536870912 ; GFX11PLUS: liveins: $vgpr0_vgpr1 ; GFX11PLUS-NEXT: {{ $}} ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_flat_s32_align536870912 ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s32_align536870912 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: {{ $}} ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_s32_align536870912 ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s32_align536870912 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i16), align 536870912) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p0) :: (load (i16), align 536870912) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index f384114ee4cde..68516b22da742 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -20,11 +20,11 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefixes=GFX9-MESA %s # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x s16>), align 1, addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align1) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x s16>), align 2, addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<3 x s32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<3 x s16>), align 4, addrspace 1) (in function: test_extload_global_v3s32_from_v3s16_align4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<4 x s16>), align 4, addrspace 1) (in function: test_extload_global_v4s32_from_v4s16_align4) +# ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x i32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x i16>), align 1, addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align1) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x i32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x i16>), align 2, addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x i32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x i16>), addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<3 x i32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<3 x i16>), align 4, addrspace 1) (in function: test_extload_global_v3s32_from_v3s16_align4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<4 x i32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<4 x i16>), align 4, addrspace 1) (in function: test_extload_global_v4s32_from_v4s16_align4) # ERR-NOT: remark --- @@ -37,59 +37,59 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_global_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 1) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i1) = G_LOAD %0(p1) :: (load (i1), addrspace 1) + %2:_(i32) = G_ZEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -102,59 +102,59 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s2_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s2_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_global_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s2_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s2_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 1) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i2) = G_LOAD %0(p1) :: (load (i2), addrspace 1) + %2:_(i32) = G_ZEXT %1(i2) + $vgpr0 = COPY %2(i32) ... --- @@ -167,47 +167,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p1) :: (load (i8), align 4, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -220,47 +220,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -273,47 +273,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p1) :: (load (i16), align 4, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -326,47 +326,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p1) :: (load (i16), addrspace 1) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -379,71 +379,71 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_global_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p1) :: (load (i16), align 1, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -456,46 +456,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -508,70 +508,70 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s32_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s32_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_global_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), align 2, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -584,110 +584,110 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s32_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s32_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_global_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), align 1, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -700,47 +700,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 8, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s24_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 8, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s24_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 8, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s24_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 8, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 8, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 8, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p1) :: (load (i24), align 8, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -753,47 +753,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s24_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s24_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p1) :: (load (i24), align 4, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -806,83 +806,83 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s24_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s24_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p1) :: (load (i24), align 2, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -895,107 +895,107 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s24_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s24_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; VI-LABEL: name: test_load_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR1]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p1) :: (load (i24), align 1, addrspace 1) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1008,59 +1008,59 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; VI-LABEL: name: test_load_global_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) ; ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 1) - %2:_(s64) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i48) = G_LOAD %0(p1) :: (load (i48), align 8, addrspace 1) + %2:_(i64) = G_ZEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1073,46 +1073,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-HSA-LABEL: name: test_load_global_s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-MESA-LABEL: name: test_load_global_s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_global_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i64), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1125,46 +1125,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-HSA-LABEL: name: test_load_global_s64_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-MESA-LABEL: name: test_load_global_s64_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_global_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i64), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1177,118 +1177,118 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; CI-HSA-LABEL: name: test_load_global_s64_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-MESA-LABEL: name: test_load_global_s64_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; VI-LABEL: name: test_load_global_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i64), align 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1301,190 +1301,190 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; CI-HSA-LABEL: name: test_load_global_s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-MESA-LABEL: name: test_load_global_s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; VI-LABEL: name: test_load_global_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i64), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1497,54 +1497,54 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-MESA-LABEL: name: test_load_global_s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_global_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p1) :: (load (i96), align 16, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1557,57 +1557,57 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-HSA-LABEL: name: test_load_global_s96_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-MESA-LABEL: name: test_load_global_s96_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_global_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p1) :: (load (i96), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1620,57 +1620,57 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-HSA-LABEL: name: test_load_global_s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-MESA-LABEL: name: test_load_global_s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_global_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p1) :: (load (i96), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1683,136 +1683,136 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-HSA-LABEL: name: test_load_global_s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 2, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-MESA-LABEL: name: test_load_global_s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_global_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p1) :: (load (i96), align 2, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -1825,240 +1825,240 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-HSA-LABEL: name: test_load_global_s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-MESA-LABEL: name: test_load_global_s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_global_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p1) :: (load (i96), align 1, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -2071,82 +2071,82 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; SI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; SI-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; CI-HSA-LABEL: name: test_load_global_s160_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; CI-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 16, addrspace 1) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; CI-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; CI-MESA-LABEL: name: test_load_global_s160_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; CI-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 16, addrspace 1) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; CI-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; VI-LABEL: name: test_load_global_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; GFX9-HSA-LABEL: name: test_load_global_s160_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; GFX9-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 16, addrspace 1) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; GFX9-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](i160) ; ; GFX9-MESA-LABEL: name: test_load_global_s160_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) - ; GFX9-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 16, addrspace 1) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i160) = G_BITCAST [[BUILD_VECTOR]](<5 x i32>) + ; GFX9-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](i160) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 1) - S_NOP 0, implicit %1 + %1:_(i160) = G_LOAD %0(p1) :: (load (i160), align 4, addrspace 1) + S_NOP 0, implicit %1(i160) ... --- @@ -2159,105 +2159,105 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32) from unknown-address + 24, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV]](i32), [[UV1]](i32), [[LOAD2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; SI-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; VI-LABEL: name: test_load_global_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) ; ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<3 x i32>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32), [[UV4]](i32), [[UV5]](i32), [[UV6]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i224) = G_BITCAST [[BUILD_VECTOR]](<7 x i32>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(i256) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(i256) = G_INSERT [[DEF]], [[BITCAST]](i224), 0 + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](i256) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1) - %2:_(s256) = G_IMPLICIT_DEF - %3:_(s256) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(i224) = G_LOAD %0(p1) :: (load (i224), align 4, addrspace 1) + %2:_(i256) = G_IMPLICIT_DEF + %3:_(i256) = G_INSERT %2, %1(i224), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(i256) ... @@ -2271,52 +2271,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-HSA-LABEL: name: test_load_global_s128_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-MESA-LABEL: name: test_load_global_s128_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_global_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p1) :: (load (i128), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -2329,52 +2329,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-HSA-LABEL: name: test_load_global_s128_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-MESA-LABEL: name: test_load_global_s128_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_global_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p1) :: (load (i128), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -2387,300 +2387,300 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-HSA-LABEL: name: test_load_global_s128_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-MESA-LABEL: name: test_load_global_s128_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_global_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p1) :: (load (i128), align 1, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -2693,52 +2693,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 16, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; CI-HSA-LABEL: name: test_load_global_s256_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 16, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; CI-MESA-LABEL: name: test_load_global_s256_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 16, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; VI-LABEL: name: test_load_global_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; GFX9-HSA-LABEL: name: test_load_global_s256_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 16, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) ; ; GFX9-MESA-LABEL: name: test_load_global_s256_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 16, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i256) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](i256) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(i256) = G_LOAD %0(p1) :: (load (i256), align 16, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... --- @@ -2789,8 +2789,8 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p1) :: (load (p1), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -2841,8 +2841,8 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p1) :: (load (p1), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -2855,44 +2855,44 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; CI-HSA-LABEL: name: test_load_global_p1_align1 @@ -2906,88 +2906,88 @@ body: | ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; VI-LABEL: name: test_load_global_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align1 @@ -3001,48 +3001,48 @@ body: | ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p1) :: (load (p1), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -3092,9 +3092,9 @@ body: | ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p3) - %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) - $vgpr0 = COPY %1 + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p3) = G_LOAD %0(p1) :: (load (p3), addrspace 1) + $vgpr0 = COPY %1(p3) ... --- @@ -3145,8 +3145,8 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p1) :: (load (p4), addrspace 1) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -3197,8 +3197,8 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p1) :: (load (p4), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -3211,26 +3211,26 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; CI-HSA-LABEL: name: test_load_global_p4_align2 @@ -3244,52 +3244,52 @@ body: | ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; VI-LABEL: name: test_load_global_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align2 @@ -3303,30 +3303,30 @@ body: | ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](i64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p1) :: (load (p4), align 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -3339,44 +3339,44 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; CI-HSA-LABEL: name: test_load_global_p4_align1 @@ -3390,88 +3390,88 @@ body: | ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; VI-LABEL: name: test_load_global_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align1 @@ -3485,48 +3485,48 @@ body: | ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](i64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(p4) = G_LOAD %0(p1) :: (load (p4), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(p4) ... --- @@ -3577,8 +3577,8 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p1) :: (load (p5), addrspace 1) + $vgpr0 = COPY %1(p5) ... --- @@ -3591,14 +3591,14 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-HSA-LABEL: name: test_load_global_p5_align2 @@ -3612,28 +3612,28 @@ body: | ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_global_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align2 @@ -3647,18 +3647,18 @@ body: | ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p1) :: (load (p5), align 2, addrspace 1) + $vgpr0 = COPY %1(p5) ... --- @@ -3671,24 +3671,24 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-HSA-LABEL: name: test_load_global_p5_align1 @@ -3702,48 +3702,48 @@ body: | ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_global_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align1 @@ -3757,28 +3757,28 @@ body: | ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p1) :: (load (p5), align 1, addrspace 1) + $vgpr0 = COPY %1(p5) ... --- @@ -3791,48 +3791,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 1) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), align 4, addrspace 1) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -3845,48 +3845,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 1) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -3899,72 +3899,72 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 1) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p1) :: (load (<2 x i8>), align 1, addrspace 1) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -3977,192 +3977,192 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; SI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-HSA-LABEL: name: test_load_global_v3s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-MESA-LABEL: name: test_load_global_v3s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 1) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p1) :: (load (<3 x i8>), align 4, addrspace 1) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... @@ -4176,242 +4176,242 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; SI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; CI-HSA-LABEL: name: test_load_global_v3s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; CI-MESA-LABEL: name: test_load_global_v3s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; VI-LABEL: name: test_load_global_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX9-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX9-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR4]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 1) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p1) :: (load (<3 x i8>), align 1, addrspace 1) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -4424,106 +4424,106 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; VI-LABEL: name: test_load_global_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x i8>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x i8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<4 x i8>) = G_LOAD %0(p1) :: (load (<4 x i8>), addrspace 1) + $vgpr0 = COPY %1(<4 x i8>) ... --- @@ -4536,126 +4536,126 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 2, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C3]](i32) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; VI-LABEL: name: test_load_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x i8>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x i8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<4 x i8>) = G_LOAD %0(p1) :: (load (<4 x i8>), align 2, addrspace 1) + $vgpr0 = COPY %1(<4 x i8>) ... --- @@ -4668,162 +4668,162 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C4]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C3]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C4]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C4]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C1]](i32) + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C3]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C4]](i32) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; VI-LABEL: name: test_load_global_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C4]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C3]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C4]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x i8>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x i8>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C4]](s32) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[OR2]], [[C4]](i32) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x i8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x i16>) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x i8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<4 x i8>) = G_LOAD %0(p1) :: (load (<4 x i8>), align 1, addrspace 1) + $vgpr0 = COPY %1(<4 x i8>) ... --- @@ -4836,47 +4836,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v8s8_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v8s8_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_global_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 1) - %2:_(<2 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<8 x i8>) = G_LOAD %0(p1) :: (load (<8 x i8>), addrspace 1) + %2:_(<2 x i32>) = G_BITCAST %1(<8 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -4889,47 +4889,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v16s8_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v16s8_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_global_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 1) - %2:_(<4 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:_(<16 x i8>) = G_LOAD %0(p1) :: (load (<16 x i8>), addrspace 1) + %2:_(<4 x i32>) = G_BITCAST %1(<16 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -4942,47 +4942,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v32s8_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v32s8_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; VI-LABEL: name: test_load_global_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 1) - %2:_(<8 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + %1:_(<32 x i8>) = G_LOAD %0(p1) :: (load (<32 x i8>), addrspace 1) + %2:_(<8 x i32>) = G_BITCAST %1(<32 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<8 x i32>) ... --- @@ -4996,46 +4996,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -5048,82 +5048,82 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p1) :: (load (<2 x i16>), align 2, addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -5136,122 +5136,122 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p1) :: (load (<2 x i16>), align 1, addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -5264,154 +5264,154 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 1) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p1) :: (load (<3 x i16>), align 8, addrspace 1) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -5424,204 +5424,204 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p1) :: (load (<3 x i16>), align 4, addrspace 1) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -5634,204 +5634,204 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 1) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p1) :: (load (<3 x i16>), align 2, addrspace 1) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -5844,260 +5844,260 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 1, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 1) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p1) :: (load (<3 x i16>), align 1, addrspace 1) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -6110,46 +6110,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; VI-LABEL: name: test_load_global_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -6162,46 +6162,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; VI-LABEL: name: test_load_global_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -6214,128 +6214,128 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>), align 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -6348,200 +6348,200 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_global_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p1) :: (load (<4 x i16>), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -6554,137 +6554,137 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v5s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>), [[UV14:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>), [[UV14:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 16, addrspace 1) - %2:_(<5 x s16>) = G_IMPLICIT_DEF - %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 + %1:_(<5 x i16>) = G_LOAD %0(p1) :: (load (<5 x i16>), align 16, addrspace 1) + %2:_(<5 x i16>) = G_IMPLICIT_DEF + %3:_(<10 x i16>) = G_CONCAT_VECTORS %1(<5 x i16>), %2(<5 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>) = G_UNMERGE_VALUES %3(<10 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) ... @@ -6698,221 +6698,221 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 8, addrspace 1) - %2:_(<5 x s16>) = G_IMPLICIT_DEF - %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 + %1:_(<5 x i16>) = G_LOAD %0(p1) :: (load (<5 x i16>), align 8, addrspace 1) + %2:_(<5 x i16>) = G_IMPLICIT_DEF + %3:_(<10 x i16>) = G_CONCAT_VECTORS %1(<5 x i16>), %2(<5 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>) = G_UNMERGE_VALUES %3(<10 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) ... @@ -6926,221 +6926,221 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 4, addrspace 1) - %2:_(<5 x s16>) = G_IMPLICIT_DEF - %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 + %1:_(<5 x i16>) = G_LOAD %0(p1) :: (load (<5 x i16>), align 4, addrspace 1) + %2:_(<5 x i16>) = G_IMPLICIT_DEF + %3:_(<10 x i16>) = G_CONCAT_VECTORS %1(<5 x i16>), %2(<5 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>) = G_UNMERGE_VALUES %3(<10 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) ... @@ -7154,239 +7154,239 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 2, addrspace 1) - %2:_(<5 x s16>) = G_IMPLICIT_DEF - %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 + %1:_(<5 x i16>) = G_LOAD %0(p1) :: (load (<5 x i16>), align 2, addrspace 1) + %2:_(<5 x i16>) = G_IMPLICIT_DEF + %3:_(<10 x i16>) = G_CONCAT_VECTORS %1(<5 x i16>), %2(<5 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>) = G_UNMERGE_VALUES %3(<10 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) ... @@ -7400,327 +7400,327 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL5]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C6]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL6]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL7]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 1, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C5]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL5]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C6]] + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL6]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL7]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL5]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C6]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL6]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL7]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[OR4]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[OR4]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 1, addrspace 1) - %2:_(<5 x s16>) = G_IMPLICIT_DEF - %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 + %1:_(<5 x i16>) = G_LOAD %0(p1) :: (load (<5 x i16>), align 1, addrspace 1) + %2:_(<5 x i16>) = G_IMPLICIT_DEF + %3:_(<10 x i16>) = G_CONCAT_VECTORS %1(<5 x i16>), %2(<5 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>) = G_UNMERGE_VALUES %3(<10 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) ... @@ -7734,54 +7734,54 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v6s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 16, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -7794,57 +7794,57 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v6s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -7857,57 +7857,57 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v6s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -7920,136 +7920,136 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 2, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v6s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 2, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 2, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -8062,240 +8062,240 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; VI-LABEL: name: test_load_global_v6s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i16>) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 1, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<6 x i16>) = G_LOAD %0(p1) :: (load (<6 x i16>), align 1, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -8308,146 +8308,146 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v7s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x i16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>), [[UV14:%[0-9]+]]:_(<2 x i16>), [[UV15:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV16:%[0-9]+]]:_(<2 x i16>), [[UV17:%[0-9]+]]:_(<2 x i16>), [[UV18:%[0-9]+]]:_(<2 x i16>), [[UV19:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV16]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>), [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>), [[UV14:%[0-9]+]]:_(<2 x i16>), [[UV15:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV16:%[0-9]+]]:_(<2 x i16>), [[UV17:%[0-9]+]]:_(<2 x i16>), [[UV18:%[0-9]+]]:_(<2 x i16>), [[UV19:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV16]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV14]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 16, addrspace 1) - %2:_(<7 x s16>) = G_IMPLICIT_DEF - %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 - $vgpr3 = COPY %7 + %1:_(<7 x i16>) = G_LOAD %0(p1) :: (load (<7 x i16>), align 16, addrspace 1) + %2:_(<7 x i16>) = G_IMPLICIT_DEF + %3:_(<14 x i16>) = G_CONCAT_VECTORS %1(<7 x i16>), %2(<7 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>), %9:_(<2 x i16>), %10:_(<2 x i16>) = G_UNMERGE_VALUES %3(<14 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) + $vgpr3 = COPY %7(<2 x i16>) ... @@ -8461,308 +8461,308 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 8, addrspace 1) - %2:_(<7 x s16>) = G_IMPLICIT_DEF - %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 - $vgpr3 = COPY %7 + %1:_(<7 x i16>) = G_LOAD %0(p1) :: (load (<7 x i16>), align 8, addrspace 1) + %2:_(<7 x i16>) = G_IMPLICIT_DEF + %3:_(<14 x i16>) = G_CONCAT_VECTORS %1(<7 x i16>), %2(<7 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>), %9:_(<2 x i16>), %10:_(<2 x i16>) = G_UNMERGE_VALUES %3(<14 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) + $vgpr3 = COPY %7(<2 x i16>) ... @@ -8776,308 +8776,308 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 4, addrspace 1) - %2:_(<7 x s16>) = G_IMPLICIT_DEF - %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 - $vgpr3 = COPY %7 + %1:_(<7 x i16>) = G_LOAD %0(p1) :: (load (<7 x i16>), align 4, addrspace 1) + %2:_(<7 x i16>) = G_IMPLICIT_DEF + %3:_(<14 x i16>) = G_CONCAT_VECTORS %1(<7 x i16>), %2(<7 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>), %9:_(<2 x i16>), %10:_(<2 x i16>) = G_UNMERGE_VALUES %3(<14 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) + $vgpr3 = COPY %7(<2 x i16>) ... @@ -9091,308 +9091,308 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 2, addrspace 1) - %2:_(<7 x s16>) = G_IMPLICIT_DEF - %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 - $vgpr3 = COPY %7 + %1:_(<7 x i16>) = G_LOAD %0(p1) :: (load (<7 x i16>), align 2, addrspace 1) + %2:_(<7 x i16>) = G_IMPLICIT_DEF + %3:_(<14 x i16>) = G_CONCAT_VECTORS %1(<7 x i16>), %2(<7 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>), %9:_(<2 x i16>), %10:_(<2 x i16>) = G_UNMERGE_VALUES %3(<14 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) + $vgpr3 = COPY %7(<2 x i16>) ... @@ -9406,428 +9406,428 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] - ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C8]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C8]] + ; SI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C9]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL7]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C8]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C9]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL8]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C8]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[OR5]], [[C8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C9]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL9]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[OR6]], [[C8]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C8]] + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C9]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL10]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1) - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, align 1, addrspace 1) + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 1, addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C7]](i32) + ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] - ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C8]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C8]] + ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C9]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL7]] + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C8]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C9]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL8]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C8]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[OR5]], [[C8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C9]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL9]] + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[OR6]], [[C8]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C8]] + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C9]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL10]] + ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] - ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C8]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C8]] + ; VI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C9]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL7]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C8]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C9]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL8]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C8]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[OR5]], [[C8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C9]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL9]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR9]](i32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[OR6]], [[C8]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C8]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C9]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL10]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR10]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x i16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x i16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD4]](i32) + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD5]](i32) + ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD6]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[OR4]](s32) - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[OR6]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[OR4]](i32) + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 10 + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[OR6]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<8 x i16>) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 1, addrspace 1) - %2:_(<7 x s16>) = G_IMPLICIT_DEF - %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 - %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 - $vgpr3 = COPY %7 + %1:_(<7 x i16>) = G_LOAD %0(p1) :: (load (<7 x i16>), align 1, addrspace 1) + %2:_(<7 x i16>) = G_IMPLICIT_DEF + %3:_(<14 x i16>) = G_CONCAT_VECTORS %1(<7 x i16>), %2(<7 x i16>) + %4:_(<2 x i16>), %5:_(<2 x i16>), %6:_(<2 x i16>), %7:_(<2 x i16>), %8:_(<2 x i16>), %9:_(<2 x i16>), %10:_(<2 x i16>) = G_UNMERGE_VALUES %3(<14 x i16>) + $vgpr0 = COPY %4(<2 x i16>) + $vgpr1 = COPY %5(<2 x i16>) + $vgpr2 = COPY %6(<2 x i16>) + $vgpr3 = COPY %7(<2 x i16>) ... @@ -9841,52 +9841,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v8s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v8s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; VI-LABEL: name: test_load_global_v8s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<8 x i16>) = G_LOAD %0(p1) :: (load (<8 x i16>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -9899,52 +9899,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; CI-HSA-LABEL: name: test_load_global_v8s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; CI-MESA-LABEL: name: test_load_global_v8s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; VI-LABEL: name: test_load_global_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i16>) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<8 x i16>) = G_LOAD %0(p1) :: (load (<8 x i16>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -9957,46 +9957,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_global_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -10009,46 +10009,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_global_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -10061,102 +10061,102 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_global_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), align 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -10169,174 +10169,174 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_global_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i32>), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -10349,48 +10349,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v3s32_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_load_global_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p1) :: (load (<3 x i32>), align 16, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -10403,51 +10403,51 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v3s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v3s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_load_global_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p1) :: (load (<3 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -10460,46 +10460,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_global_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -10512,46 +10512,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_global_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -10564,46 +10564,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_global_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i32>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -10616,46 +10616,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v8s32_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v8s32_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; VI-LABEL: name: test_load_global_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<8 x i32>) = G_LOAD %0(p1) :: (load (<8 x i32>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -10668,46 +10668,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 32, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; CI-HSA-LABEL: name: test_load_global_v16s32_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 32, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; CI-MESA-LABEL: name: test_load_global_v16s32_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 32, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; VI-LABEL: name: test_load_global_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 32, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; GFX9-HSA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 32, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) ; ; GFX9-MESA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x i32>) = G_LOAD [[COPY]](p1) :: (load (<16 x i32>), align 32, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<16 x i32>) = G_LOAD %0(p1) :: (load (<16 x i32>), align 32, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -10720,46 +10720,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; VI-LABEL: name: test_load_global_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -10772,46 +10772,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; VI-LABEL: name: test_load_global_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -10824,46 +10824,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; VI-LABEL: name: test_load_global_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -10876,194 +10876,194 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_global_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR3]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR4]](i32) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[SHL5]], [[ZEXT1]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR2]](i64), [[OR5]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 2, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -11076,330 +11076,330 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_global_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p1) :: (load (<2 x i64>), align 1, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -11412,52 +11412,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-HSA-LABEL: name: test_load_global_v2sp1_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-MESA-LABEL: name: test_load_global_v2sp1_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_global_v2sp1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -11470,72 +11470,72 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<4 x i64>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64), [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64), [[UV7]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p1) :: (load (<3 x i64>), align 32, addrspace 1) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -11548,90 +11548,90 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, addrspace 1) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, addrspace 1) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, addrspace 1) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, addrspace 1) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p1) :: (load (<3 x i64>), align 8, addrspace 1) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -11644,490 +11644,490 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; SI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; SI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; SI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; SI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; SI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; SI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; SI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; SI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; SI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, align 1, addrspace 1) + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; CI-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; CI-MESA-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; VI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; VI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; VI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p1) :: (load (<2 x i64>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p1) :: (load (i64) from unknown-address + 16, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[UV3]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p1) :: (load (<3 x i64>), align 1, addrspace 1) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -12140,46 +12140,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; VI-LABEL: name: test_load_global_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p1) :: (load (<4 x i64>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -12192,46 +12192,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 8, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 8, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 8, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; VI-LABEL: name: test_load_global_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 8, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p1) :: (load (<4 x i64>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -12244,602 +12244,602 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) - ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) - ; SI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; SI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) - ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) - ; SI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; SI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; SI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; SI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) - ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) - ; SI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; SI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) - ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) - ; SI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; SI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; SI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; SI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; SI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; SI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; SI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; SI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; SI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; SI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; SI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; SI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; SI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (i8) from unknown-address + 24, addrspace 1) + ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (i8) from unknown-address + 25, addrspace 1) + ; SI-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; SI-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (i8) from unknown-address + 26, addrspace 1) + ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p1) :: (load (i8) from unknown-address + 27, addrspace 1) + ; SI-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; SI-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; SI-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; SI-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (i8) from unknown-address + 28, addrspace 1) + ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (i8) from unknown-address + 29, addrspace 1) + ; SI-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; SI-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (i8) from unknown-address + 30, addrspace 1) + ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p1) :: (load (i8) from unknown-address + 31, addrspace 1) + ; SI-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; SI-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; SI-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; SI-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; CI-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) - ; CI-MESA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; CI-MESA-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) - ; CI-MESA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; CI-MESA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; CI-MESA-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) - ; CI-MESA-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; CI-MESA-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) - ; CI-MESA-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; CI-MESA-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; CI-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; CI-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; CI-MESA-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; CI-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (i8) from unknown-address + 24, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (i8) from unknown-address + 25, addrspace 1) + ; CI-MESA-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; CI-MESA-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (i8) from unknown-address + 26, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p1) :: (load (i8) from unknown-address + 27, addrspace 1) + ; CI-MESA-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; CI-MESA-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; CI-MESA-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (i8) from unknown-address + 28, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (i8) from unknown-address + 29, addrspace 1) + ; CI-MESA-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; CI-MESA-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (i8) from unknown-address + 30, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p1) :: (load (i8) from unknown-address + 31, addrspace 1) + ; CI-MESA-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; CI-MESA-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; CI-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-MESA-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; CI-MESA-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_global_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) - ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) - ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; VI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) - ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) - ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) - ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) - ; VI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; VI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) - ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) - ; VI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; VI-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; VI-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; VI-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; VI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (i8) from unknown-address + 24, addrspace 1) + ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (i8) from unknown-address + 25, addrspace 1) + ; VI-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; VI-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (i8) from unknown-address + 26, addrspace 1) + ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p1) :: (load (i8) from unknown-address + 27, addrspace 1) + ; VI-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; VI-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; VI-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; VI-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (i8) from unknown-address + 28, addrspace 1) + ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (i8) from unknown-address + 29, addrspace 1) + ; VI-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; VI-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (i8) from unknown-address + 30, addrspace 1) + ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p1) :: (load (i8) from unknown-address + 31, addrspace 1) + ; VI-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; VI-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; VI-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; VI-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i64>) = G_LOAD [[COPY]](p1) :: (load (<4 x i64>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x i64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] - ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] - ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] - ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] - ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] - ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] - ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) - ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; GFX9-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] - ; GFX9-MESA-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] - ; GFX9-MESA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] - ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) - ; GFX9-MESA-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] - ; GFX9-MESA-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] - ; GFX9-MESA-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] - ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) - ; GFX9-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[OR15]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[OR14]] + ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[OR16]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[OR18]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[SHL19]], [[OR17]] + ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[OR19]](i32) + ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT2]], [[COPY2]](i32) + ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(i64) = G_OR [[SHL20]], [[ZEXT2]] + ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 + ; GFX9-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD23]](p1) :: (load (i8) from unknown-address + 24, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD24]](p1) :: (load (i8) from unknown-address + 25, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD19]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; GFX9-MESA-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD25]](p1) :: (load (i8) from unknown-address + 26, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD25]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD26]](p1) :: (load (i8) from unknown-address + 27, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[LOAD6]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; GFX9-MESA-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[OR22]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[SHL23]], [[OR21]] + ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[OR23]](i32) + ; GFX9-MESA-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD27]](p1) :: (load (i8) from unknown-address + 28, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD28]](p1) :: (load (i8) from unknown-address + 29, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD22]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; GFX9-MESA-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD29]](p1) :: (load (i8) from unknown-address + 30, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD29]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD30]](p1) :: (load (i8) from unknown-address + 31, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[LOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; GFX9-MESA-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[OR25]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[SHL26]], [[OR24]] + ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[OR26]](i32) + ; GFX9-MESA-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-MESA-NEXT: [[SHL27:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT3]], [[COPY3]](i32) + ; GFX9-MESA-NEXT: [[OR27:%[0-9]+]]:_(i64) = G_OR [[SHL27]], [[ZEXT3]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64), [[OR20]](i64), [[OR27]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p1) :: (load (<4 x i64>), align 1, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -12852,52 +12852,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; CI-HSA-LABEL: name: test_load_global_v2s128_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; CI-MESA-LABEL: name: test_load_global_v2s128_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; VI-LABEL: name: test_load_global_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i128>) = G_BITCAST [[LOAD]](<8 x i32>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x i128>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<2 x i128>) = G_LOAD %0(p1) :: (load (<2 x i128>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<2 x i128>) ... --- @@ -12910,52 +12910,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_global_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -12968,52 +12968,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_global_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -13026,52 +13026,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_global_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -13084,300 +13084,300 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_global_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p1) :: (load (<2 x p1>), align 1, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -13390,52 +13390,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; CI-HSA-LABEL: name: test_load_global_v4p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 8, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; CI-MESA-LABEL: name: test_load_global_v4p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 8, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; VI-LABEL: name: test_load_global_v4p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 8, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x i32>) = G_LOAD [[COPY]](p1) :: (load (<8 x i32>), align 8, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x p1>) = G_LOAD %0(p1) :: (load (<4 x p1>), align 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x p1>) ... --- @@ -13448,52 +13448,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_global_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p1) :: (load (<2 x p3>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -13506,52 +13506,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_global_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p1) :: (load (<2 x p3>), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -13564,180 +13564,180 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_global_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p1) :: (load (<2 x p3>), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -13750,46 +13750,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_global_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i8), align 4, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -13802,46 +13802,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_global_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i16), align 4, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -13854,106 +13854,106 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR1]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s24), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i24), align 1, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- name: test_ext_load_global_s32_from_s24_align2 @@ -13965,82 +13965,82 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s24), align 2, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i24), align 2, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -14053,46 +14053,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i24), align 4, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -14106,52 +14106,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i8), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -14164,52 +14164,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i16), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -14222,52 +14222,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -14280,70 +14280,70 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -14356,52 +14356,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i16), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -14414,52 +14414,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p1) :: (load (i8), align 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -14472,46 +14472,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 1, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i16>), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -14524,46 +14524,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), align 2, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i16>), align 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -14576,46 +14576,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -14628,46 +14628,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i16>), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i16>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i16>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i16>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i16>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i16>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p1) :: (load (<3 x i16>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -14680,46 +14680,46 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i16>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p1) :: (load (<4 x i16>), align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -14732,449 +14732,449 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; SI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; SI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; SI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; SI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; SI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-HSA-LABEL: name: test_global_v2s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 1, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-MESA-LABEL: name: test_global_v2s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_global_v2s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 1, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i8) from unknown-address + 3, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i8) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p1) :: (load (i8) from unknown-address + 5, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i8) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i8) from unknown-address + 7, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i8) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p1) :: (load (i8) from unknown-address + 9, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i8) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i8) from unknown-address + 11, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p1) :: (load (i8) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p1) :: (load (i8) from unknown-address + 13, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p1) :: (load (i8) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p1) :: (load (i8) from unknown-address + 15, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p1) :: (load (i8) from unknown-address + 16, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p1) :: (load (i8) from unknown-address + 17, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p1) :: (load (i8) from unknown-address + 18, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD17]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p1) :: (load (i8) from unknown-address + 19, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p1) :: (load (i8) from unknown-address + 20, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p1) :: (load (i8) from unknown-address + 21, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p1) :: (load (i8) from unknown-address + 22, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD21]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p1) :: (load (i8) from unknown-address + 23, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p1) :: (load (<2 x i96>), align 1, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -15187,249 +15187,249 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i16) from unknown-address + 16, addrspace 1) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i16) from unknown-address + 18, addrspace 1) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i16) from unknown-address + 20, addrspace 1) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i16) from unknown-address + 22, addrspace 1) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-HSA-LABEL: name: test_global_v2s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 2, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 2, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-MESA-LABEL: name: test_global_v2s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) - ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i16) from unknown-address + 16, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i16) from unknown-address + 18, addrspace 1) + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; CI-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i16) from unknown-address + 20, addrspace 1) + ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i16) from unknown-address + 22, addrspace 1) + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_global_v2s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i16) from unknown-address + 16, addrspace 1) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i16) from unknown-address + 18, addrspace 1) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i16) from unknown-address + 20, addrspace 1) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i16) from unknown-address + 22, addrspace 1) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 2, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) - ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p1) :: (load (i16) from unknown-address + 4, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (i16) from unknown-address + 8, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p1) :: (load (i16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p1) :: (load (i16) from unknown-address + 12, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p1) :: (load (i16) from unknown-address + 14, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p1) :: (load (i16) from unknown-address + 16, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p1) :: (load (i16) from unknown-address + 18, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; GFX9-MESA-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p1) :: (load (i16) from unknown-address + 20, addrspace 1) + ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p1) :: (load (i16) from unknown-address + 22, addrspace 1) + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p1) :: (load (<2 x i96>), align 2, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -15442,106 +15442,106 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p1) :: (load (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32) from unknown-address + 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](i64) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32) from unknown-address + 20, addrspace 1) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-HSA-LABEL: name: test_global_v2s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-MESA-LABEL: name: test_global_v2s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_global_v2s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p1) :: (load (<2 x i96>), align 4, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -15554,104 +15554,104 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p1) :: (load (<4 x i32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](i64) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32) from unknown-address + 20, addrspace 1) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD1]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[LOAD2]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-HSA-LABEL: name: test_global_v2s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-MESA-LABEL: name: test_global_v2s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_global_v2s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p1) :: (load (<3 x i32>), align 16, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p1) :: (load (<2 x i96>), align 16, addrspace 1) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -15664,526 +15664,526 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) - ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) - ; SI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) - ; SI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) - ; SI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) - ; SI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) - ; SI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) - ; SI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) - ; SI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) - ; SI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) - ; SI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) - ; SI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) - ; SI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) - ; SI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) - ; SI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) - ; SI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) - ; SI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) - ; SI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) - ; SI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; SI-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) - ; SI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; SI-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) - ; SI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; SI-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) - ; SI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) - ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C7]](i32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C8]](i32) + ; SI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C9]](i32) + ; SI-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C10]](i32) + ; SI-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C11]](i32) + ; SI-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C12]](i32) + ; SI-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C13]](i32) + ; SI-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C14]](i32) + ; SI-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C15]](i32) + ; SI-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C16]](i32) + ; SI-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C17]](i32) + ; SI-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C18]](i32) + ; SI-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C19]](i32) + ; SI-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C20]](i32) + ; SI-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C21]](i32) + ; SI-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C22]](i32) + ; SI-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C23]](i32) + ; SI-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C24]](i32) + ; SI-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C25]](i32) + ; SI-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C26]](i32) + ; SI-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; SI-NEXT: [[LSHR27:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C27]](i32) + ; SI-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; SI-NEXT: [[LSHR28:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C28]](i32) + ; SI-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; SI-NEXT: [[LSHR29:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C29]](i32) + ; SI-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[LSHR30:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C30]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32), [[LSHR12]](i32), [[LSHR13]](i32), [[LSHR14]](i32), [[LSHR15]](i32), [[LSHR16]](i32), [[LSHR17]](i32), [[LSHR18]](i32), [[LSHR19]](i32), [[LSHR20]](i32), [[LSHR21]](i32), [[LSHR22]](i32), [[LSHR23]](i32), [[LSHR24]](i32), [[LSHR25]](i32), [[LSHR26]](i32), [[LSHR27]](i32), [[LSHR28]](i32), [[LSHR29]](i32), [[LSHR30]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x i1>) = G_TRUNC [[BUILD_VECTOR]](<32 x i32>) + ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x i1>) ; ; CI-HSA-LABEL: name: test_load_global_v32s1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) - ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CI-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) - ; CI-HSA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CI-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) - ; CI-HSA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CI-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) - ; CI-HSA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) - ; CI-HSA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CI-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) - ; CI-HSA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CI-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) - ; CI-HSA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CI-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) - ; CI-HSA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) - ; CI-HSA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CI-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) - ; CI-HSA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CI-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) - ; CI-HSA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CI-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) - ; CI-HSA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) - ; CI-HSA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CI-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) - ; CI-HSA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CI-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) - ; CI-HSA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CI-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) - ; CI-HSA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) - ; CI-HSA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CI-HSA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) - ; CI-HSA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CI-HSA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) - ; CI-HSA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CI-HSA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) - ; CI-HSA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-HSA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) - ; CI-HSA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CI-HSA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) - ; CI-HSA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CI-HSA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) - ; CI-HSA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CI-HSA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) - ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C7]](i32) + ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; CI-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C8]](i32) + ; CI-HSA-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CI-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C9]](i32) + ; CI-HSA-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; CI-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C10]](i32) + ; CI-HSA-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C11]](i32) + ; CI-HSA-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CI-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C12]](i32) + ; CI-HSA-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; CI-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C13]](i32) + ; CI-HSA-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CI-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C14]](i32) + ; CI-HSA-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C15]](i32) + ; CI-HSA-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CI-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C16]](i32) + ; CI-HSA-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; CI-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C17]](i32) + ; CI-HSA-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CI-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C18]](i32) + ; CI-HSA-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C19]](i32) + ; CI-HSA-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; CI-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C20]](i32) + ; CI-HSA-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; CI-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C21]](i32) + ; CI-HSA-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; CI-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C22]](i32) + ; CI-HSA-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C23]](i32) + ; CI-HSA-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; CI-HSA-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C24]](i32) + ; CI-HSA-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; CI-HSA-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C25]](i32) + ; CI-HSA-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; CI-HSA-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C26]](i32) + ; CI-HSA-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-HSA-NEXT: [[LSHR27:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C27]](i32) + ; CI-HSA-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; CI-HSA-NEXT: [[LSHR28:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C28]](i32) + ; CI-HSA-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CI-HSA-NEXT: [[LSHR29:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C29]](i32) + ; CI-HSA-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CI-HSA-NEXT: [[LSHR30:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C30]](i32) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32), [[LSHR12]](i32), [[LSHR13]](i32), [[LSHR14]](i32), [[LSHR15]](i32), [[LSHR16]](i32), [[LSHR17]](i32), [[LSHR18]](i32), [[LSHR19]](i32), [[LSHR20]](i32), [[LSHR21]](i32), [[LSHR22]](i32), [[LSHR23]](i32), [[LSHR24]](i32), [[LSHR25]](i32), [[LSHR26]](i32), [[LSHR27]](i32), [[LSHR28]](i32), [[LSHR29]](i32), [[LSHR30]](i32) + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x i1>) = G_TRUNC [[BUILD_VECTOR]](<32 x i32>) + ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x i1>) ; ; CI-MESA-LABEL: name: test_load_global_v32s1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) - ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CI-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) - ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CI-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) - ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CI-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) - ; CI-MESA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) - ; CI-MESA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CI-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) - ; CI-MESA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CI-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) - ; CI-MESA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CI-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) - ; CI-MESA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) - ; CI-MESA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CI-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) - ; CI-MESA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CI-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) - ; CI-MESA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CI-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) - ; CI-MESA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) - ; CI-MESA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CI-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) - ; CI-MESA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CI-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) - ; CI-MESA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CI-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) - ; CI-MESA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) - ; CI-MESA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CI-MESA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) - ; CI-MESA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CI-MESA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) - ; CI-MESA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CI-MESA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) - ; CI-MESA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-MESA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) - ; CI-MESA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CI-MESA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) - ; CI-MESA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CI-MESA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) - ; CI-MESA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CI-MESA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) - ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C7]](i32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; CI-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C8]](i32) + ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CI-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C9]](i32) + ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; CI-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C10]](i32) + ; CI-MESA-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C11]](i32) + ; CI-MESA-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CI-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C12]](i32) + ; CI-MESA-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; CI-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C13]](i32) + ; CI-MESA-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CI-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C14]](i32) + ; CI-MESA-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C15]](i32) + ; CI-MESA-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CI-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C16]](i32) + ; CI-MESA-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; CI-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C17]](i32) + ; CI-MESA-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CI-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C18]](i32) + ; CI-MESA-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C19]](i32) + ; CI-MESA-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; CI-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C20]](i32) + ; CI-MESA-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; CI-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C21]](i32) + ; CI-MESA-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; CI-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C22]](i32) + ; CI-MESA-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C23]](i32) + ; CI-MESA-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; CI-MESA-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C24]](i32) + ; CI-MESA-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; CI-MESA-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C25]](i32) + ; CI-MESA-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; CI-MESA-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C26]](i32) + ; CI-MESA-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-MESA-NEXT: [[LSHR27:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C27]](i32) + ; CI-MESA-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; CI-MESA-NEXT: [[LSHR28:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C28]](i32) + ; CI-MESA-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CI-MESA-NEXT: [[LSHR29:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C29]](i32) + ; CI-MESA-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CI-MESA-NEXT: [[LSHR30:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C30]](i32) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32), [[LSHR12]](i32), [[LSHR13]](i32), [[LSHR14]](i32), [[LSHR15]](i32), [[LSHR16]](i32), [[LSHR17]](i32), [[LSHR18]](i32), [[LSHR19]](i32), [[LSHR20]](i32), [[LSHR21]](i32), [[LSHR22]](i32), [[LSHR23]](i32), [[LSHR24]](i32), [[LSHR25]](i32), [[LSHR26]](i32), [[LSHR27]](i32), [[LSHR28]](i32), [[LSHR29]](i32), [[LSHR30]](i32) + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x i1>) = G_TRUNC [[BUILD_VECTOR]](<32 x i32>) + ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x i1>) ; ; VI-LABEL: name: test_load_global_v32s1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) - ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) - ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) - ; VI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) - ; VI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) - ; VI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) - ; VI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) - ; VI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) - ; VI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) - ; VI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) - ; VI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) - ; VI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) - ; VI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) - ; VI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) - ; VI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) - ; VI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) - ; VI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) - ; VI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) - ; VI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; VI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) - ; VI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; VI-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) - ; VI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; VI-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) - ; VI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; VI-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) - ; VI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) - ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C7]](i32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C8]](i32) + ; VI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C9]](i32) + ; VI-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C10]](i32) + ; VI-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C11]](i32) + ; VI-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C12]](i32) + ; VI-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C13]](i32) + ; VI-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C14]](i32) + ; VI-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C15]](i32) + ; VI-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C16]](i32) + ; VI-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C17]](i32) + ; VI-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C18]](i32) + ; VI-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C19]](i32) + ; VI-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C20]](i32) + ; VI-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C21]](i32) + ; VI-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C22]](i32) + ; VI-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C23]](i32) + ; VI-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C24]](i32) + ; VI-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C25]](i32) + ; VI-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; VI-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C26]](i32) + ; VI-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; VI-NEXT: [[LSHR27:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C27]](i32) + ; VI-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; VI-NEXT: [[LSHR28:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C28]](i32) + ; VI-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; VI-NEXT: [[LSHR29:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C29]](i32) + ; VI-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[LSHR30:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C30]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32), [[LSHR9]](i32), [[LSHR10]](i32), [[LSHR11]](i32), [[LSHR12]](i32), [[LSHR13]](i32), [[LSHR14]](i32), [[LSHR15]](i32), [[LSHR16]](i32), [[LSHR17]](i32), [[LSHR18]](i32), [[LSHR19]](i32), [[LSHR20]](i32), [[LSHR21]](i32), [[LSHR22]](i32), [[LSHR23]](i32), [[LSHR24]](i32), [[LSHR25]](i32), [[LSHR26]](i32), [[LSHR27]](i32), [[LSHR28]](i32), [[LSHR29]](i32), [[LSHR30]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x i1>) = G_TRUNC [[BUILD_VECTOR]](<32 x i32>) + ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x i1>) ; ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; GFX9-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; GFX9-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) - ; GFX9-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GFX9-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) - ; GFX9-HSA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX9-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) - ; GFX9-HSA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX9-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) - ; GFX9-HSA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) - ; GFX9-HSA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; GFX9-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) - ; GFX9-HSA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; GFX9-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) - ; GFX9-HSA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) - ; GFX9-HSA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) - ; GFX9-HSA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; GFX9-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) - ; GFX9-HSA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; GFX9-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) - ; GFX9-HSA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; GFX9-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) - ; GFX9-HSA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) - ; GFX9-HSA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; GFX9-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) - ; GFX9-HSA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; GFX9-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) - ; GFX9-HSA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) - ; GFX9-HSA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) - ; GFX9-HSA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX9-HSA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) - ; GFX9-HSA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; GFX9-HSA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) - ; GFX9-HSA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; GFX9-HSA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) - ; GFX9-HSA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-HSA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) - ; GFX9-HSA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; GFX9-HSA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) - ; GFX9-HSA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; GFX9-HSA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) - ; GFX9-HSA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-HSA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-HSA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; GFX9-HSA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; GFX9-HSA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; GFX9-HSA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; GFX9-HSA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; GFX9-HSA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; GFX9-HSA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; GFX9-HSA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) - ; GFX9-HSA-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; GFX9-HSA-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; GFX9-HSA-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; GFX9-HSA-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; GFX9-HSA-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; GFX9-HSA-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; GFX9-HSA-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; GFX9-HSA-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR22]](s32) - ; GFX9-HSA-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; GFX9-HSA-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR24]](s32) - ; GFX9-HSA-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR25]](s32) - ; GFX9-HSA-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR26]](s32) - ; GFX9-HSA-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR27]](s32) - ; GFX9-HSA-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR28]](s32) - ; GFX9-HSA-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR29]](s32) - ; GFX9-HSA-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR30]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[TRUNC15]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC16]](s16), [[TRUNC17]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC18]](s16), [[TRUNC19]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC20]](s16), [[TRUNC21]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC22]](s16), [[TRUNC23]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC24]](s16), [[TRUNC25]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC26]](s16), [[TRUNC27]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC28]](s16), [[TRUNC29]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC30]](s16), [[TRUNC31]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>), [[BUILD_VECTOR8]](<2 x s16>), [[BUILD_VECTOR9]](<2 x s16>), [[BUILD_VECTOR10]](<2 x s16>), [[BUILD_VECTOR11]](<2 x s16>), [[BUILD_VECTOR12]](<2 x s16>), [[BUILD_VECTOR13]](<2 x s16>), [[BUILD_VECTOR14]](<2 x s16>), [[BUILD_VECTOR15]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC32:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC32]](<32 x s1>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; GFX9-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; GFX9-HSA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C7]](i32) + ; GFX9-HSA-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; GFX9-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C8]](i32) + ; GFX9-HSA-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX9-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C9]](i32) + ; GFX9-HSA-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; GFX9-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C10]](i32) + ; GFX9-HSA-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C11]](i32) + ; GFX9-HSA-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; GFX9-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C12]](i32) + ; GFX9-HSA-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; GFX9-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C13]](i32) + ; GFX9-HSA-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; GFX9-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C14]](i32) + ; GFX9-HSA-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C15]](i32) + ; GFX9-HSA-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; GFX9-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C16]](i32) + ; GFX9-HSA-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; GFX9-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C17]](i32) + ; GFX9-HSA-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; GFX9-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C18]](i32) + ; GFX9-HSA-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C19]](i32) + ; GFX9-HSA-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; GFX9-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C20]](i32) + ; GFX9-HSA-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; GFX9-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C21]](i32) + ; GFX9-HSA-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C22]](i32) + ; GFX9-HSA-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C23]](i32) + ; GFX9-HSA-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX9-HSA-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C24]](i32) + ; GFX9-HSA-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; GFX9-HSA-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C25]](i32) + ; GFX9-HSA-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; GFX9-HSA-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C26]](i32) + ; GFX9-HSA-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-HSA-NEXT: [[LSHR27:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C27]](i32) + ; GFX9-HSA-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; GFX9-HSA-NEXT: [[LSHR28:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C28]](i32) + ; GFX9-HSA-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; GFX9-HSA-NEXT: [[LSHR29:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C29]](i32) + ; GFX9-HSA-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-HSA-NEXT: [[LSHR30:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C30]](i32) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-HSA-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; GFX9-HSA-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; GFX9-HSA-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; GFX9-HSA-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; GFX9-HSA-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; GFX9-HSA-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; GFX9-HSA-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; GFX9-HSA-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR14]](i32) + ; GFX9-HSA-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; GFX9-HSA-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR16]](i32) + ; GFX9-HSA-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR17]](i32) + ; GFX9-HSA-NEXT: [[TRUNC19:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; GFX9-HSA-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR19]](i32) + ; GFX9-HSA-NEXT: [[TRUNC21:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR20]](i32) + ; GFX9-HSA-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; GFX9-HSA-NEXT: [[TRUNC23:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR22]](i32) + ; GFX9-HSA-NEXT: [[TRUNC24:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR23]](i32) + ; GFX9-HSA-NEXT: [[TRUNC25:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR24]](i32) + ; GFX9-HSA-NEXT: [[TRUNC26:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR25]](i32) + ; GFX9-HSA-NEXT: [[TRUNC27:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR26]](i32) + ; GFX9-HSA-NEXT: [[TRUNC28:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR27]](i32) + ; GFX9-HSA-NEXT: [[TRUNC29:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR28]](i32) + ; GFX9-HSA-NEXT: [[TRUNC30:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR29]](i32) + ; GFX9-HSA-NEXT: [[TRUNC31:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR30]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC12]](i16), [[TRUNC13]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC14]](i16), [[TRUNC15]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC16]](i16), [[TRUNC17]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC18]](i16), [[TRUNC19]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC20]](i16), [[TRUNC21]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC22]](i16), [[TRUNC23]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC24]](i16), [[TRUNC25]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC26]](i16), [[TRUNC27]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC28]](i16), [[TRUNC29]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC30]](i16), [[TRUNC31]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>), [[BUILD_VECTOR8]](<2 x i16>), [[BUILD_VECTOR9]](<2 x i16>), [[BUILD_VECTOR10]](<2 x i16>), [[BUILD_VECTOR11]](<2 x i16>), [[BUILD_VECTOR12]](<2 x i16>), [[BUILD_VECTOR13]](<2 x i16>), [[BUILD_VECTOR14]](<2 x i16>), [[BUILD_VECTOR15]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC32:%[0-9]+]]:_(<32 x i1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x i16>) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC32]](<32 x i1>) ; ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) - ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GFX9-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) - ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX9-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) - ; GFX9-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX9-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) - ; GFX9-MESA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) - ; GFX9-MESA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; GFX9-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) - ; GFX9-MESA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; GFX9-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) - ; GFX9-MESA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) - ; GFX9-MESA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) - ; GFX9-MESA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; GFX9-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) - ; GFX9-MESA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; GFX9-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) - ; GFX9-MESA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; GFX9-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) - ; GFX9-MESA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) - ; GFX9-MESA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; GFX9-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) - ; GFX9-MESA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; GFX9-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) - ; GFX9-MESA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) - ; GFX9-MESA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) - ; GFX9-MESA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX9-MESA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) - ; GFX9-MESA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; GFX9-MESA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) - ; GFX9-MESA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; GFX9-MESA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) - ; GFX9-MESA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-MESA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) - ; GFX9-MESA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; GFX9-MESA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) - ; GFX9-MESA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; GFX9-MESA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) - ; GFX9-MESA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-MESA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; GFX9-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; GFX9-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; GFX9-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; GFX9-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; GFX9-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; GFX9-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) - ; GFX9-MESA-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; GFX9-MESA-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; GFX9-MESA-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; GFX9-MESA-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; GFX9-MESA-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; GFX9-MESA-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; GFX9-MESA-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; GFX9-MESA-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR22]](s32) - ; GFX9-MESA-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; GFX9-MESA-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR24]](s32) - ; GFX9-MESA-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR25]](s32) - ; GFX9-MESA-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR26]](s32) - ; GFX9-MESA-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR27]](s32) - ; GFX9-MESA-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR28]](s32) - ; GFX9-MESA-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR29]](s32) - ; GFX9-MESA-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR30]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[TRUNC15]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC16]](s16), [[TRUNC17]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC18]](s16), [[TRUNC19]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC20]](s16), [[TRUNC21]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC22]](s16), [[TRUNC23]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC24]](s16), [[TRUNC25]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC26]](s16), [[TRUNC27]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC28]](s16), [[TRUNC29]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC30]](s16), [[TRUNC31]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>), [[BUILD_VECTOR8]](<2 x s16>), [[BUILD_VECTOR9]](<2 x s16>), [[BUILD_VECTOR10]](<2 x s16>), [[BUILD_VECTOR11]](<2 x s16>), [[BUILD_VECTOR12]](<2 x s16>), [[BUILD_VECTOR13]](<2 x s16>), [[BUILD_VECTOR14]](<2 x s16>), [[BUILD_VECTOR15]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC32:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC32]](<32 x s1>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C7]](i32) + ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; GFX9-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C8]](i32) + ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GFX9-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C9]](i32) + ; GFX9-MESA-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; GFX9-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C10]](i32) + ; GFX9-MESA-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C11]](i32) + ; GFX9-MESA-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; GFX9-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C12]](i32) + ; GFX9-MESA-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; GFX9-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C13]](i32) + ; GFX9-MESA-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; GFX9-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C14]](i32) + ; GFX9-MESA-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C15]](i32) + ; GFX9-MESA-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; GFX9-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C16]](i32) + ; GFX9-MESA-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; GFX9-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C17]](i32) + ; GFX9-MESA-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; GFX9-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C18]](i32) + ; GFX9-MESA-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C19]](i32) + ; GFX9-MESA-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; GFX9-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C20]](i32) + ; GFX9-MESA-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; GFX9-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C21]](i32) + ; GFX9-MESA-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C22]](i32) + ; GFX9-MESA-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C23]](i32) + ; GFX9-MESA-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX9-MESA-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C24]](i32) + ; GFX9-MESA-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; GFX9-MESA-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C25]](i32) + ; GFX9-MESA-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; GFX9-MESA-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C26]](i32) + ; GFX9-MESA-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-MESA-NEXT: [[LSHR27:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C27]](i32) + ; GFX9-MESA-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; GFX9-MESA-NEXT: [[LSHR28:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C28]](i32) + ; GFX9-MESA-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; GFX9-MESA-NEXT: [[LSHR29:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C29]](i32) + ; GFX9-MESA-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-MESA-NEXT: [[LSHR30:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C30]](i32) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i32) + ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; GFX9-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; GFX9-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR10]](i32) + ; GFX9-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; GFX9-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; GFX9-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR13]](i32) + ; GFX9-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR14]](i32) + ; GFX9-MESA-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; GFX9-MESA-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR16]](i32) + ; GFX9-MESA-NEXT: [[TRUNC18:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR17]](i32) + ; GFX9-MESA-NEXT: [[TRUNC19:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; GFX9-MESA-NEXT: [[TRUNC20:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR19]](i32) + ; GFX9-MESA-NEXT: [[TRUNC21:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR20]](i32) + ; GFX9-MESA-NEXT: [[TRUNC22:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; GFX9-MESA-NEXT: [[TRUNC23:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR22]](i32) + ; GFX9-MESA-NEXT: [[TRUNC24:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR23]](i32) + ; GFX9-MESA-NEXT: [[TRUNC25:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR24]](i32) + ; GFX9-MESA-NEXT: [[TRUNC26:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR25]](i32) + ; GFX9-MESA-NEXT: [[TRUNC27:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR26]](i32) + ; GFX9-MESA-NEXT: [[TRUNC28:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR27]](i32) + ; GFX9-MESA-NEXT: [[TRUNC29:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR28]](i32) + ; GFX9-MESA-NEXT: [[TRUNC30:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR29]](i32) + ; GFX9-MESA-NEXT: [[TRUNC31:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR30]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC10]](i16), [[TRUNC11]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC12]](i16), [[TRUNC13]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC14]](i16), [[TRUNC15]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC16]](i16), [[TRUNC17]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC18]](i16), [[TRUNC19]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC20]](i16), [[TRUNC21]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC22]](i16), [[TRUNC23]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC24]](i16), [[TRUNC25]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC26]](i16), [[TRUNC27]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC28]](i16), [[TRUNC29]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC30]](i16), [[TRUNC31]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>), [[BUILD_VECTOR6]](<2 x i16>), [[BUILD_VECTOR7]](<2 x i16>), [[BUILD_VECTOR8]](<2 x i16>), [[BUILD_VECTOR9]](<2 x i16>), [[BUILD_VECTOR10]](<2 x i16>), [[BUILD_VECTOR11]](<2 x i16>), [[BUILD_VECTOR12]](<2 x i16>), [[BUILD_VECTOR13]](<2 x i16>), [[BUILD_VECTOR14]](<2 x i16>), [[BUILD_VECTOR15]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC32:%[0-9]+]]:_(<32 x i1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x i16>) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC32]](<32 x i1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<32 x s1>) = G_LOAD %0 :: (load (<32 x s1>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<32 x i1>) = G_LOAD %0(p1) :: (load (<32 x i1>), addrspace 1) + $vgpr0 = COPY %1(<32 x i1>) ... --- @@ -16196,166 +16196,166 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x i4>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x i4>) ; ; CI-HSA-LABEL: name: test_load_global_v8s4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32) + ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x i4>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x i4>) ; ; CI-MESA-LABEL: name: test_load_global_v8s4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32) + ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x i4>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x i4>) ; ; VI-LABEL: name: test_load_global_v8s4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32), [[LSHR3]](i32), [[LSHR4]](i32), [[LSHR5]](i32), [[LSHR6]](i32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x i4>) = G_TRUNC [[BUILD_VECTOR]](<8 x i32>) + ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x i4>) ; ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; GFX9-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC8]](<8 x s4>) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; GFX9-HSA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-HSA-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x i4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x i16>) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC8]](<8 x i4>) ; ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) - ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) - ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC8]](<8 x s4>) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C2]](i32) + ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C3]](i32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C4]](i32) + ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C5]](i32) + ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C6]](i32) + ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>), [[BUILD_VECTOR3]](<2 x i16>) + ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x i4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x i16>) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC8]](<8 x i4>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s4>) = G_LOAD %0 :: (load (<8 x s4>), align 4, addrspace 1) - $vgpr0 = COPY %1 + %1:_(<8 x i4>) = G_LOAD %0(p1) :: (load (<8 x i4>), addrspace 1) + $vgpr0 = COPY %1(<8 x i4>) ... # Make sure there's no crash on very high alignments @@ -16369,45 +16369,45 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 536870912, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-HSA-LABEL: name: test_load_global_s32_align536870912 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) - ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 536870912, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-MESA-LABEL: name: test_load_global_s32_align536870912 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) - ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 536870912, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_global_s32_align536870912 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 536870912, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align536870912 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 536870912, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align536870912 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i16), align 536870912, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i16), align 536870912, addrspace 1) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 1608234d6b2bc..a8410d533fe3b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -22,95 +22,95 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-LABEL: name: test_load_local_s1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_local_s1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_load_local_s1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX10-LABEL: name: test_load_local_s1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11-LABEL: name: test_load_local_s1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 3) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i1) = G_LOAD %0(p3) :: (load (i1), addrspace 3) + %2:_(i32) = G_ZEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -123,95 +123,95 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-LABEL: name: test_load_local_s2_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s2_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_local_s2_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_load_local_s2_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX10-LABEL: name: test_load_local_s2_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11-LABEL: name: test_load_local_s2_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 3) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i2) = G_LOAD %0(p3) :: (load (i2), addrspace 3) + %2:_(i32) = G_ZEXT %1(i2) + $vgpr0 = COPY %2(i32) ... --- @@ -224,75 +224,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p3) :: (load (i8), align 4, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -305,75 +305,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p3) :: (load (i8), addrspace 3) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -386,75 +386,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p3) :: (load (i16), align 4, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -467,75 +467,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p3) :: (load (i16), addrspace 3) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -548,117 +548,117 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-LABEL: name: test_load_local_s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_local_s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_local_s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p3) :: (load (i16), align 1, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -671,74 +671,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p3) :: (load (i32), addrspace 3) + $vgpr0 = COPY %1(i32) ... --- @@ -751,116 +751,116 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-LABEL: name: test_load_local_s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_local_s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_local_s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p3) :: (load (i32), align 2, addrspace 3) + $vgpr0 = COPY %1(i32) ... --- @@ -873,186 +873,186 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-LABEL: name: test_load_local_s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_local_s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_local_s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p3) :: (load (i32), align 1, addrspace 3) + $vgpr0 = COPY %1(i32) ... --- @@ -1065,75 +1065,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s24_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s24_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s24_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s24_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s24_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s24_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p3) :: (load (i24), align 8, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1146,75 +1146,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_s24_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s24_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_s24_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_s24_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_s24_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_s24_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p3) :: (load (i24), align 4, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1227,135 +1227,135 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-LABEL: name: test_load_local_s24_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s24_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_local_s24_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_local_s24_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-LABEL: name: test_load_local_s24_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-LABEL: name: test_load_local_s24_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p3) :: (load (i24), align 2, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1368,177 +1368,177 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; CI-LABEL: name: test_load_local_s24_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; CI-DS128-LABEL: name: test_load_local_s24_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; VI-LABEL: name: test_load_local_s24_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX9-LABEL: name: test_load_local_s24_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-LABEL: name: test_load_local_s24_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-LABEL: name: test_load_local_s24_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p3) :: (load (i24), align 1, addrspace 3) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1551,75 +1551,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-LABEL: name: test_load_local_s48_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_local_s48_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-LABEL: name: test_load_local_s48_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11-LABEL: name: test_load_local_s48_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 3) - %2:_(s64) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i48) = G_LOAD %0(p3) :: (load (i48), align 8, addrspace 3) + %2:_(i64) = G_ANYEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1632,74 +1632,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-LABEL: name: test_load_local_s64_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-DS128-LABEL: name: test_load_local_s64_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_local_s64_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-LABEL: name: test_load_local_s64_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-LABEL: name: test_load_local_s64_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11-LABEL: name: test_load_local_s64_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i64), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1712,74 +1712,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-LABEL: name: test_load_local_s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; CI-DS128-LABEL: name: test_load_local_s64_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; VI-LABEL: name: test_load_local_s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-LABEL: name: test_load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-LABEL: name: test_load_local_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11-LABEL: name: test_load_local_s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i64), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1792,208 +1792,208 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; CI-LABEL: name: test_load_local_s64_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; CI-DS128-LABEL: name: test_load_local_s64_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; VI-LABEL: name: test_load_local_s64_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; GFX9-LABEL: name: test_load_local_s64_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-LABEL: name: test_load_local_s64_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) ; ; GFX11-LABEL: name: test_load_local_s64_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i64), align 2, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -2006,334 +2006,334 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; CI-LABEL: name: test_load_local_s64_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; CI-DS128-LABEL: name: test_load_local_s64_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; VI-LABEL: name: test_load_local_s64_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; GFX9-LABEL: name: test_load_local_s64_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX10-LABEL: name: test_load_local_s64_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) ; ; GFX11-LABEL: name: test_load_local_s64_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i64), align 1, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -2346,413 +2346,413 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_local_s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-DS128-LABEL: name: test_load_local_s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_local_s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_local_s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_local_s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_local_s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p3) :: (load (i96), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -2765,136 +2765,136 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_local_s96_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-DS128-LABEL: name: test_load_local_s96_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_local_s96_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_local_s96_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 8, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_local_s96_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_local_s96_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p3) :: (load (i96), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -2907,136 +2907,136 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_local_s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-DS128-LABEL: name: test_load_local_s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_local_s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_local_s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_local_s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_local_s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p3) :: (load (i96), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3049,238 +3049,238 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_local_s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-DS128-LABEL: name: test_load_local_s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_local_s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_local_s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_local_s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_local_s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p3) :: (load (i96), align 2, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3293,413 +3293,413 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_local_s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-DS128-LABEL: name: test_load_local_s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_local_s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_local_s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_local_s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_local_s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p3) :: (load (i96), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3712,523 +3712,523 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_local_s128_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-DS128-LABEL: name: test_load_local_s128_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_local_s128_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_local_s128_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_local_s128_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_local_s128_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p3) :: (load (i128), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4241,112 +4241,112 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_local_s128_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-DS128-LABEL: name: test_load_local_s128_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_local_s128_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_local_s128_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_local_s128_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_local_s128_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p3) :: (load (i128), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4359,152 +4359,152 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, align 4, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_local_s128_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, align 4, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-DS128-LABEL: name: test_load_local_s128_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_local_s128_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_local_s128_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_local_s128_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_local_s128_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p3) :: (load (i128), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4517,292 +4517,292 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR3]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_local_s128_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR3]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-DS128-LABEL: name: test_load_local_s128_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_local_s128_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_local_s128_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_local_s128_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_local_s128_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p3) :: (load (i128), align 2, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4815,523 +4815,523 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_local_s128_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-DS128-LABEL: name: test_load_local_s128_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_local_s128_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_local_s128_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_local_s128_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_local_s128_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p3) :: (load (i128), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -5410,8 +5410,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p3) :: (load (p1), addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -5424,16 +5424,16 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](i64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; CI-LABEL: name: test_load_local_p1_align4 @@ -5475,32 +5475,32 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](i64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](i64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX11-LABEL: name: test_load_local_p1_align4 @@ -5517,8 +5517,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p3) :: (load (p1), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -5531,130 +5531,130 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; CI-LABEL: name: test_load_local_p1_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; CI-DS128-LABEL: name: test_load_local_p1_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; VI-LABEL: name: test_load_local_p1_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX9-LABEL: name: test_load_local_p1_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align2 @@ -5668,68 +5668,68 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](i64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX11-LABEL: name: test_load_local_p1_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align2 @@ -5739,8 +5739,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p3) :: (load (p1), align 2, addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -5753,220 +5753,220 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; CI-LABEL: name: test_load_local_p1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; CI-DS128-LABEL: name: test_load_local_p1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; VI-LABEL: name: test_load_local_p1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX9-LABEL: name: test_load_local_p1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align1 @@ -5980,104 +5980,104 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](i64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX11-LABEL: name: test_load_local_p1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align1 @@ -6087,8 +6087,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p3) :: (load (p1), align 1, addrspace 3) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -6167,8 +6167,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:_(p3) = G_LOAD %0(p3) :: (load (p3), addrspace 3) + $vgpr0 = COPY %1(p3) ... --- @@ -6181,70 +6181,70 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; CI-LABEL: name: test_load_local_p3_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; CI-DS128-LABEL: name: test_load_local_p3_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; VI-LABEL: name: test_load_local_p3_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX9-LABEL: name: test_load_local_p3_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align2 @@ -6258,14 +6258,14 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align2 @@ -6279,14 +6279,14 @@ body: | ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align2 @@ -6296,8 +6296,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:_(p3) = G_LOAD %0(p3) :: (load (p3), align 2, addrspace 3) + $vgpr0 = COPY %1(p3) ... --- @@ -6310,120 +6310,120 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; CI-LABEL: name: test_load_local_p3_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; CI-DS128-LABEL: name: test_load_local_p3_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; VI-LABEL: name: test_load_local_p3_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX9-LABEL: name: test_load_local_p3_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align1 @@ -6437,24 +6437,24 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align1 @@ -6468,24 +6468,24 @@ body: | ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align1 @@ -6495,8 +6495,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:_(p3) = G_LOAD %0(p3) :: (load (p3), align 1, addrspace 3) + $vgpr0 = COPY %1(p3) ... --- @@ -6575,8 +6575,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p3) :: (load (p5), addrspace 3) + $vgpr0 = COPY %1(p5) ... --- @@ -6589,70 +6589,70 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-LABEL: name: test_load_local_p5_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-DS128-LABEL: name: test_load_local_p5_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_local_p5_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-LABEL: name: test_load_local_p5_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align2 @@ -6666,14 +6666,14 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align2 @@ -6687,14 +6687,14 @@ body: | ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align2 @@ -6704,8 +6704,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p3) :: (load (p5), align 2, addrspace 3) + $vgpr0 = COPY %1(p5) ... --- @@ -6718,120 +6718,120 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-LABEL: name: test_load_local_p5_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-DS128-LABEL: name: test_load_local_p5_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_local_p5_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-LABEL: name: test_load_local_p5_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align1 @@ -6845,24 +6845,24 @@ body: | ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align1 @@ -6876,24 +6876,24 @@ body: | ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align1 @@ -6903,8 +6903,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p3) :: (load (p5), align 1, addrspace 3) + $vgpr0 = COPY %1(p5) ... --- @@ -6917,76 +6917,76 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 3) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p3) :: (load (<2 x i8>), addrspace 3) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -6999,140 +6999,140 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_local_v2s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v2s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_local_v2s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_local_v2s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_local_v2s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_local_v2s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 3) - %2:_(<2 x s32>) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<2 x i8>) = G_LOAD %0(p3) :: (load (<2 x i8>), align 1, addrspace 3) + %2:_(<2 x i32>) = G_ANYEXT %1(<2 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -7145,312 +7145,312 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; SI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-LABEL: name: test_load_local_v3s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-DS128-LABEL: name: test_load_local_v3s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_local_v3s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_local_v3s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX10-LABEL: name: test_load_local_v3s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX11-LABEL: name: test_load_local_v3s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX11-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX11-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 1) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX11-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 1, align 4) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p3) :: (load (<3 x i8>), align 4, addrspace 1) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -7463,397 +7463,397 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; SI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; CI-LABEL: name: test_load_local_v3s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; CI-DS128-LABEL: name: test_load_local_v3s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-DS128-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-DS128-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; VI-LABEL: name: test_load_local_v3s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX9-LABEL: name: test_load_local_v3s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX9-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX9-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX10-LABEL: name: test_load_local_v3s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX11-LABEL: name: test_load_local_v3s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX11-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX11-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX11-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX11-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 3) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p3) :: (load (<3 x i8>), align 1, addrspace 3) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -7866,75 +7866,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_local_v4s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_load_local_v4s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_local_v4s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_local_v4s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_local_v4s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_local_v4s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 3) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p3) :: (load (<4 x i8>), addrspace 3) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -7947,75 +7947,75 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_load_local_v8s8_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v8s8_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_local_v8s8_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_local_v8s8_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_local_v8s8_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_local_v8s8_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 3) - %2:_(<2 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<8 x i8>) = G_LOAD %0(p3) :: (load (<8 x i8>), addrspace 3) + %2:_(<2 x i32>) = G_BITCAST %1(<8 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -8028,510 +8028,510 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_local_v16s8_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v16s8_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_local_v16s8_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_local_v16s8_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_local_v16s8_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 3) - %2:_(<4 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:_(<16 x i8>) = G_LOAD %0(p3) :: (load (<16 x i8>), align 1, addrspace 3) + %2:_(<4 x i32>) = G_BITCAST %1(<16 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -8544,74 +8544,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-LABEL: name: test_load_local_v2s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; VI-LABEL: name: test_load_local_v2s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_local_v2s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: test_load_local_v2s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: test_load_local_v2s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p3) :: (load (<2 x i16>), addrspace 3) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -8624,132 +8624,132 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-LABEL: name: test_load_local_v2s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_local_v2s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_local_v2s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: test_load_local_v2s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: test_load_local_v2s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p3) :: (load (<2 x i16>), align 2, addrspace 3) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -8762,202 +8762,202 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-LABEL: name: test_load_local_v2s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_local_v2s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_local_v2s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: test_load_local_v2s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: test_load_local_v2s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p3) :: (load (<2 x i16>), align 1, addrspace 3) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -8970,242 +8970,242 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-LABEL: name: test_load_local_v3s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p3) :: (load (<3 x i16>), align 8, addrspace 3) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -9218,320 +9218,320 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-LABEL: name: test_load_local_v3s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-LABEL: name: test_load_local_v3s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 3) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p3) :: (load (<3 x i16>), align 2, addrspace 3) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -9544,418 +9544,418 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-LABEL: name: test_load_local_v3s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_local_v3s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_local_v3s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-LABEL: name: test_load_local_v3s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-LABEL: name: test_load_local_v3s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 3) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p3) :: (load (<3 x i16>), align 1, addrspace 3) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -9967,74 +9967,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-LABEL: name: test_load_local_v4s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; VI-LABEL: name: test_load_local_v4s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_local_v4s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_local_v4s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_local_v4s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p3) :: (load (<4 x i16>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -10047,128 +10047,128 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 4, addrspace 3) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 4, addrspace 3) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_local_v4s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; VI-LABEL: name: test_load_local_v4s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_local_v4s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_local_v4s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 4, addrspace 3) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 4, addrspace 3) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_local_v4s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p3) :: (load (<4 x i16>), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -10180,226 +10180,226 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_local_v4s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_local_v4s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p3) :: (load (<4 x i16>), align 2, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -10412,352 +10412,352 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_local_v4s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_local_v4s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C6]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C6]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_local_v4s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_local_v4s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_local_v4s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p3) :: (load (<4 x i16>), align 1, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -10770,74 +10770,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_load_local_v2s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_local_v2s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_local_v2s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_local_v2s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_local_v2s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -10850,74 +10850,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_load_local_v2s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_load_local_v2s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_local_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_local_v2s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -10930,176 +10930,176 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_local_v2s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_local_v2s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_local_v2s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_local_v2s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_local_v2s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), align 2, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -11112,302 +11112,302 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_local_v2s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_local_v2s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_local_v2s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_local_v2s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_local_v2s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i32>), align 1, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -11420,403 +11420,403 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-LABEL: name: test_load_local_v3s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v3s32_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_load_local_v3s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_load_local_v3s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX10-LABEL: name: test_load_local_v3s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX11-LABEL: name: test_load_local_v3s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p3) :: (load (<3 x i32>), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -11829,126 +11829,126 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-LABEL: name: test_load_local_v3s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v3s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_load_local_v3s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_load_local_v3s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX10-LABEL: name: test_load_local_v3s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX11-LABEL: name: test_load_local_v3s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p3) :: (load (<3 x i32>), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -11961,82 +11961,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 16, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-LABEL: name: test_load_local_v4s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 16, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_local_v4s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_local_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_local_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -12049,102 +12049,102 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-LABEL: name: test_load_local_v4s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_load_local_v4s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_local_v4s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_local_v4s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_local_v4s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), align 8, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -12157,142 +12157,142 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, align 4, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-LABEL: name: test_load_local_v4s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, align 4, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_local_v4s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_local_v4s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_local_v4s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_local_v4s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -12305,282 +12305,282 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR3]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-LABEL: name: test_load_local_v4s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR3]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_local_v4s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_local_v4s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_local_v4s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_local_v4s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), align 2, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -12593,513 +12593,513 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-LABEL: name: test_load_local_v4s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i32>), [[BUILD_VECTOR1]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_local_v4s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_local_v4s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_local_v4s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_local_v4s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i32>), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -13112,126 +13112,126 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 32, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 16, align 16, addrspace 3) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x i32>) from unknown-address + 24, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>), [[LOAD2]](<2 x i32>), [[LOAD3]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; CI-LABEL: name: test_load_local_v8s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 32, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 16, align 16, addrspace 3) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x i32>) from unknown-address + 24, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>), [[LOAD2]](<2 x i32>), [[LOAD3]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v8s32_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; VI-LABEL: name: test_load_local_v8s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX9-LABEL: name: test_load_local_v8s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX10-LABEL: name: test_load_local_v8s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX11-LABEL: name: test_load_local_v8s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<8 x i32>) = G_LOAD %0(p3) :: (load (<8 x i32>), addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -13244,198 +13244,198 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x s32>) from unknown-address + 40, addrspace 3) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x s32>) from unknown-address + 48, align 16, addrspace 3) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 32, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 16, align 16, addrspace 3) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x i32>) from unknown-address + 24, addrspace 3) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x i32>) from unknown-address + 40, addrspace 3) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x i32>) from unknown-address + 48, align 16, addrspace 3) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x i32>) from unknown-address + 56, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>), [[LOAD2]](<2 x i32>), [[LOAD3]](<2 x i32>), [[LOAD4]](<2 x i32>), [[LOAD5]](<2 x i32>), [[LOAD6]](<2 x i32>), [[LOAD7]](<2 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; CI-LABEL: name: test_load_local_v16s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x s32>) from unknown-address + 40, addrspace 3) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x s32>) from unknown-address + 48, align 16, addrspace 3) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 32, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 16, align 16, addrspace 3) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x i32>) from unknown-address + 24, addrspace 3) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x i32>) from unknown-address + 40, addrspace 3) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x i32>) from unknown-address + 48, align 16, addrspace 3) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x i32>) from unknown-address + 56, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>), [[LOAD2]](<2 x i32>), [[LOAD3]](<2 x i32>), [[LOAD4]](<2 x i32>), [[LOAD5]](<2 x i32>), [[LOAD6]](<2 x i32>), [[LOAD7]](<2 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; CI-DS128-LABEL: name: test_load_local_v16s32_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; VI-LABEL: name: test_load_local_v16s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX9-LABEL: name: test_load_local_v16s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX10-LABEL: name: test_load_local_v16s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX11-LABEL: name: test_load_local_v16s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x i32>) from unknown-address + 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x i32>) from unknown-address + 48, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<16 x i32>) = G_LOAD %0(p3) :: (load (<16 x i32>), align 32, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -13448,106 +13448,106 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-LABEL: name: test_load_local_v2s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-DS128-LABEL: name: test_load_local_v2s64_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_local_v2s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_local_v2s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX10-LABEL: name: test_load_local_v2s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX11-LABEL: name: test_load_local_v2s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 4, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, align 4, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p3) :: (load (<2 x i64>), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -13560,583 +13560,583 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-LABEL: name: test_load_local_v2s64_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-DS128-LABEL: name: test_load_local_v2s64_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; CI-DS128-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; CI-DS128-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_local_v2s64_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_local_v2s64_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX10-LABEL: name: test_load_local_v2s64_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]] - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD1]](i32) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[ZEXT]] + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[ZEXT1]] + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX11-LABEL: name: test_load_local_v2s64_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p3) :: (load (<2 x i64>), align 1, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -14149,150 +14149,150 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 32, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64), [[LOAD2]](i64), [[UV3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 32, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64), [[LOAD2]](i64), [[UV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11-LABEL: name: test_load_local_v3s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p3) :: (load (<3 x i64>), align 32, addrspace 3) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -14305,126 +14305,126 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 32, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD2]](p3) :: (load (i64) from unknown-address + 24, addrspace 3) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64), [[LOAD2]](i64), [[LOAD3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-LABEL: name: test_load_local_v4s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p3) :: (load (i64), align 32, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p3) :: (load (i64) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD1]](p3) :: (load (i64) from unknown-address + 16, align 16, addrspace 3) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD2]](p3) :: (load (i64) from unknown-address + 24, addrspace 3) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LOAD]](i64), [[LOAD1]](i64), [[LOAD2]](i64), [[LOAD3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-DS128-LABEL: name: test_load_local_v4s64_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; VI-LABEL: name: test_load_local_v4s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_local_v4s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX10-LABEL: name: test_load_local_v4s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX11-LABEL: name: test_load_local_v4s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p3) :: (load (<2 x i64>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i64>) from unknown-address + 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p3) :: (load (<4 x i64>), addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -14437,152 +14437,152 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, align 4, addrspace 3) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-LABEL: name: test_load_local_v2p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x i32>) from unknown-address + 8, align 4, addrspace 3) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[LOAD]](<2 x i32>), [[LOAD1]](<2 x i32>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-DS128-LABEL: name: test_load_local_v2p1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_local_v2p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-LABEL: name: test_load_local_v2p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX10-LABEL: name: test_load_local_v2p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11-LABEL: name: test_load_local_v2p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p3) :: (load (<2 x p1>), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -14595,84 +14595,84 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-LABEL: name: test_load_local_v2p3_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-DS128-LABEL: name: test_load_local_v2p3_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_local_v2p3_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-LABEL: name: test_load_local_v2p3_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX10-LABEL: name: test_load_local_v2p3_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX11-LABEL: name: test_load_local_v2p3_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p3) :: (load (<2 x p3>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -14685,74 +14685,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_extload_local_s32_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_extload_local_s32_from_1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_extload_local_s32_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p3) :: (load (i8), align 4, addrspace 3) + $vgpr0 = COPY %1(i32) ... --- @@ -14765,74 +14765,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_extload_local_s32_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-DS128-LABEL: name: test_extload_local_s32_from_2_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_extload_local_s32_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p3) :: (load (i16), align 4, addrspace 3) + $vgpr0 = COPY %1(i32) ... --- @@ -14846,84 +14846,84 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_extload_local_s64_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_extload_local_s64_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i8), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -14936,84 +14936,84 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_extload_local_s64_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_extload_local_s64_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i16), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -15026,84 +15026,84 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_extload_local_s64_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_extload_local_s64_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i32), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -15116,114 +15116,114 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; CI-LABEL: name: test_extload_local_s128_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-DS128-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-DS128-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; VI-LABEL: name: test_extload_local_s128_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX10-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX10-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX11-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX11-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p3) :: (load (i32), addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -15236,84 +15236,84 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_extload_local_s64_from_2_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_extload_local_s64_from_2_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i16), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -15326,84 +15326,84 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_extload_local_s64_from_1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_extload_local_s64_from_1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p3) :: (load (i8), align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -15416,74 +15416,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i16>), align 1, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -15496,74 +15496,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i16>), align 2, addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -15576,74 +15576,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p3) :: (load (<2 x i16>), addrspace 3) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -15656,74 +15656,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-DS128-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX10-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX11-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i16>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p3) :: (load (<3 x i16>), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -15736,74 +15736,74 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-DS128-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p3) :: (load (<4 x i16>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p3) :: (load (<4 x i16>), align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -15816,778 +15816,778 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; SI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; SI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; SI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; SI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; SI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_local_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-DS128-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-DS128-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-DS128-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-DS128-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; CI-DS128-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-DS128-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; CI-DS128-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-DS128-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-DS128-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-DS128-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-DS128-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-DS128-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-DS128-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; CI-DS128-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-DS128-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; CI-DS128-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-DS128-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-DS128-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_local_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_local_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 1, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_local_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX10-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; GFX10-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; GFX10-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; GFX10-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; GFX10-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX10-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; GFX10-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX10-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; GFX10-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; GFX10-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX10-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; GFX10-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX10-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX10-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; GFX10-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; GFX10-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; GFX10-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; GFX10-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; GFX10-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX10-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX10-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; GFX10-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX10-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX10-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 1, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, align 1, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_local_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) - ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) - ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) - ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) - ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; GFX11-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) - ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; GFX11-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) - ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; GFX11-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; GFX11-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; GFX11-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX11-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX11-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) - ; GFX11-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; GFX11-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; GFX11-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) - ; GFX11-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX11-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; GFX11-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (i8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (i8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (i8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (i8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (i8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (i8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p3) :: (load (i8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (i8) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (i8) from unknown-address + 17, addrspace 3) + ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX11-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (i8) from unknown-address + 18, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p3) :: (load (i8) from unknown-address + 19, addrspace 3) + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX11-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; GFX11-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; GFX11-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; GFX11-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (i8) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; GFX11-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (i8) from unknown-address + 21, addrspace 3) + ; GFX11-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; GFX11-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX11-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (i8) from unknown-address + 22, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p3) :: (load (i8) from unknown-address + 23, addrspace 3) + ; GFX11-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX11-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX11-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; GFX11-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 3) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p3) :: (load (<2 x i96>), align 1, addrspace 3) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -16600,435 +16600,435 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_local_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; CI-DS128-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_local_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_local_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 2, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_local_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 2, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, align 2, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_local_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) - ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) - ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (i16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (i16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (i16) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p3) :: (load (i16) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (i16) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p3) :: (load (i16) from unknown-address + 18, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (i16) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p3) :: (load (i16) from unknown-address + 22, addrspace 3) + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 3) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p3) :: (load (<2 x i96>), align 2, addrspace 3) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -17041,247 +17041,247 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_local_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 4, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_local_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_local_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_local_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_local_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 3) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p3) :: (load (<2 x i96>), align 4, addrspace 3) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -17294,215 +17294,215 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 16, addrspace 3) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<2 x i32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_local_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p3) :: (load (<2 x i32>), align 16, addrspace 3) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 8, align 8, addrspace 3) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[LOAD2]](<2 x i32>) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](i32) + ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](i32) + ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_local_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_local_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_local_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](i32) + ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_local_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p3) :: (load (i32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p3) :: (load (i32) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p3) :: (load (i32) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p3) :: (load (<3 x i32>), align 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 3) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p3) :: (load (<2 x i96>), align 16, addrspace 3) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir index da88d5f13c1eb..106ce4cf3c151 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir @@ -44,14 +44,14 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), !tbaa !1, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !range !0, !tbaa !1) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p1) :: (load (i24), align 4, addrspace 1, !range !0, !tbaa !1) + %2:_(i32) = G_ZEXT %1(i24) + $vgpr0 = COPY %2(i32) ... @@ -65,11 +65,11 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), !tbaa !1, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !range !3, !tbaa !1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i24), align 4, addrspace 1, !range !3, !tbaa !1) + $vgpr0 = COPY %1(i32) ... --- @@ -81,14 +81,14 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), !tbaa !1, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !tbaa !1) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p1) :: (load (i24), align 4, addrspace 1, !tbaa !1) + %2:_(i32) = G_ZEXT %1(i24) + $vgpr0 = COPY %2(i32) ... @@ -102,10 +102,10 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), !tbaa !1, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !tbaa !1) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i24), align 4, addrspace 1, !tbaa !1) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 8835956e11a4e..815bee481c55f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -25,104 +25,104 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-LABEL: name: test_load_private_s1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_private_s1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_load_private_s1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX10-LABEL: name: test_load_private_s1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11-LABEL: name: test_load_private_s1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX12-LABEL: name: test_load_private_s1_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s1_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s1_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s1_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 5) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i1) = G_LOAD %0(p5) :: (load (i1), addrspace 5) + %2:_(i32) = G_ZEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -135,104 +135,104 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; CI-LABEL: name: test_load_private_s2_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; CI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; VI-LABEL: name: test_load_private_s2_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX9-LABEL: name: test_load_private_s2_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX10-LABEL: name: test_load_private_s2_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX11-LABEL: name: test_load_private_s2_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX12-LABEL: name: test_load_private_s2_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s2_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s2_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s2_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s2_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 5) - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + %1:_(i2) = G_LOAD %0(p5) :: (load (i2), addrspace 5) + %2:_(i32) = G_ZEXT %1(i2) + $vgpr0 = COPY %2(i32) ... --- @@ -245,82 +245,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s8_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s8_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s8_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s8_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p5) :: (load (i8), align 4, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -333,82 +333,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s8_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s8_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s8_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s8_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i8) = G_LOAD %0(p5) :: (load (i8), addrspace 5) + %2:_(i32) = G_ANYEXT %1(i8) + $vgpr0 = COPY %2(i32) ... --- @@ -421,82 +421,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s16_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s16_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s16_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s16_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p5) :: (load (i16), align 4, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -509,82 +509,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s16_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s16_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s16_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s16_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p5) :: (load (i16), addrspace 5) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -597,124 +597,124 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-LABEL: name: test_load_private_s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_private_s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_private_s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s16_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s16_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s16_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i16) = G_LOAD %0(p5) :: (load (i16), align 1, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -727,81 +727,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s32_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s32_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s32_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s32_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0 = COPY %1(i32) ... --- @@ -814,123 +814,123 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-LABEL: name: test_load_private_s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_private_s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_private_s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s32_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s32_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s32_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s32_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p5) :: (load (i32), align 2, addrspace 5) + $vgpr0 = COPY %1(i32) ... --- @@ -943,193 +943,193 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-LABEL: name: test_load_private_s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_private_s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_private_s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s32_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s32_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s32_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s32_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p5) :: (load (i32), align 1, addrspace 5) + $vgpr0 = COPY %1(i32) ... --- @@ -1142,82 +1142,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s24_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s24_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s24_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s24_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s24_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s24_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s24_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s24_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s24_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s24_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p5) :: (load (i24), align 8, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1230,82 +1230,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_s24_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_s24_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_s24_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_s24_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_s24_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_s24_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s24_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s24_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s24_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s24_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p5) :: (load (i24), align 4, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1318,148 +1318,148 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; CI-LABEL: name: test_load_private_s24_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; VI-LABEL: name: test_load_private_s24_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX9-LABEL: name: test_load_private_s24_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-LABEL: name: test_load_private_s24_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-LABEL: name: test_load_private_s24_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX12-LABEL: name: test_load_private_s24_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s24_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s24_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s24_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s24_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, align 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p5) :: (load (i24), align 2, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1472,190 +1472,190 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; CI-LABEL: name: test_load_private_s24_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; VI-LABEL: name: test_load_private_s24_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; GFX9-LABEL: name: test_load_private_s24_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX10-LABEL: name: test_load_private_s24_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX11-LABEL: name: test_load_private_s24_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; GFX12-LABEL: name: test_load_private_s24_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s24_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s24_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s24_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR1]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s24_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR1]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i24) = G_LOAD %0(p5) :: (load (i24), align 1, addrspace 5) + %2:_(i32) = G_ANYEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -1668,180 +1668,180 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; CI-LABEL: name: test_load_private_s48_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_private_s48_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_load_private_s48_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX10-LABEL: name: test_load_private_s48_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX11-LABEL: name: test_load_private_s48_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_private_s48_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s48_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s48_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C2]] + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s48_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s48_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 5) - %2:_(s64) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i48) = G_LOAD %0(p5) :: (load (i48), align 8, addrspace 5) + %2:_(i64) = G_ANYEXT %1(i48) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1854,109 +1854,109 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; CI-LABEL: name: test_load_private_s64_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_private_s64_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_load_private_s64_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX10-LABEL: name: test_load_private_s64_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX11-LABEL: name: test_load_private_s64_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_private_s64_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s64_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s64_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s64_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s64_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i64), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -1969,109 +1969,109 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; CI-LABEL: name: test_load_private_s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_private_s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_load_private_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX10-LABEL: name: test_load_private_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX11-LABEL: name: test_load_private_s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_private_s64_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s64_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s64_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s64_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s64_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i64), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -2084,195 +2084,195 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; CI-LABEL: name: test_load_private_s64_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_private_s64_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_load_private_s64_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX10-LABEL: name: test_load_private_s64_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX11-LABEL: name: test_load_private_s64_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_private_s64_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s64_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s64_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s64_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s64_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i64), align 2, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -2285,321 +2285,321 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; CI-LABEL: name: test_load_private_s64_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; VI-LABEL: name: test_load_private_s64_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_load_private_s64_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX10-LABEL: name: test_load_private_s64_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX11-LABEL: name: test_load_private_s64_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; GFX12-LABEL: name: test_load_private_s64_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p5) :: (load (i64), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s64_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s64_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s64_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i64), align 1, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -2612,428 +2612,428 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_private_s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_private_s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_private_s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_private_s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_private_s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_private_s96_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s96_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s96_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p5) :: (load (i96), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3046,141 +3046,141 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_private_s96_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_private_s96_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_private_s96_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_private_s96_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_private_s96_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 8, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 8, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_private_s96_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 8, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 8, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s96_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s96_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p5) :: (load (i96), align 8, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3193,141 +3193,141 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_private_s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_private_s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_private_s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_private_s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_private_s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_private_s96_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s96_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s96_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p5) :: (load (i96), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3340,253 +3340,253 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_private_s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_private_s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_private_s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_private_s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_private_s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_private_s96_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 2, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s96_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s96_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p5) :: (load (i96), align 2, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -3599,428 +3599,428 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; CI-LABEL: name: test_load_private_s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; VI-LABEL: name: test_load_private_s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX9-LABEL: name: test_load_private_s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX10-LABEL: name: test_load_private_s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX11-LABEL: name: test_load_private_s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; GFX12-LABEL: name: test_load_private_s96_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s96_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s96_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(i96) = G_LOAD %0(p5) :: (load (i96), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -4033,539 +4033,539 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_private_s128_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_private_s128_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_private_s128_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_private_s128_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_private_s128_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_private_s128_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s128_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s128_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p5) :: (load (i128), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4578,162 +4578,162 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_private_s128_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_private_s128_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_private_s128_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_private_s128_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_private_s128_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_private_s128_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s128_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s128_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p5) :: (load (i128), align 8, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4746,162 +4746,162 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_private_s128_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_private_s128_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_private_s128_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_private_s128_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_private_s128_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_private_s128_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s128_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s128_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p5) :: (load (i128), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -4914,308 +4914,308 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_private_s128_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_private_s128_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_private_s128_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 2, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 2, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_private_s128_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 2, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_private_s128_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_private_s128_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 2, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s128_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s128_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p5) :: (load (i128), align 2, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -5228,539 +5228,539 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; CI-LABEL: name: test_load_private_s128_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; VI-LABEL: name: test_load_private_s128_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX9-LABEL: name: test_load_private_s128_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX10-LABEL: name: test_load_private_s128_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX11-LABEL: name: test_load_private_s128_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; GFX12-LABEL: name: test_load_private_s128_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[LOAD]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_s128_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_s128_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i128) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](i128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p5) :: (load (i128), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -5773,55 +5773,55 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; CI-LABEL: name: test_load_private_p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_private_p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9-LABEL: name: test_load_private_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX10-LABEL: name: test_load_private_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX11-LABEL: name: test_load_private_p1_align8 @@ -5842,22 +5842,22 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p1_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p1_align8 @@ -5874,8 +5874,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), addrspace 5) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p5) :: (load (p1), addrspace 5) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -5888,55 +5888,55 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; CI-LABEL: name: test_load_private_p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_private_p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9-LABEL: name: test_load_private_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX10-LABEL: name: test_load_private_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX11-LABEL: name: test_load_private_p1_align4 @@ -5957,22 +5957,22 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p1_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p1_align4 @@ -5989,8 +5989,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 4, addrspace 5) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p5) :: (load (p1), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -6003,85 +6003,85 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; CI-LABEL: name: test_load_private_p1_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_private_p1_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9-LABEL: name: test_load_private_p1_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX10-LABEL: name: test_load_private_p1_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX11-LABEL: name: test_load_private_p1_align2 @@ -6102,98 +6102,98 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p1_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p1_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p1_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](i64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p5) :: (load (p1), align 2, addrspace 5) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -6206,139 +6206,139 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; CI-LABEL: name: test_load_private_p1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; VI-LABEL: name: test_load_private_p1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX9-LABEL: name: test_load_private_p1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX10-LABEL: name: test_load_private_p1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; GFX11-LABEL: name: test_load_private_p1_align1 @@ -6359,170 +6359,170 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p1_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p1_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](i64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(p1) = G_LOAD %0(p5) :: (load (p1), align 1, addrspace 5) + $vgpr0_vgpr1 = COPY %1(p1) ... --- @@ -6608,8 +6608,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p5) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:_(p3) = G_LOAD %0(p5) :: (load (p3), addrspace 5) + $vgpr0 = COPY %1(p3) ... --- @@ -6622,42 +6622,42 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; CI-LABEL: name: test_load_private_p3_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; VI-LABEL: name: test_load_private_p3_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX9-LABEL: name: test_load_private_p3_align2 @@ -6692,60 +6692,60 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p3_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p3_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p3_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 5) - $vgpr0 = COPY %1 + %1:_(p3) = G_LOAD %0(p5) :: (load (p3), align 2, addrspace 5) + $vgpr0 = COPY %1(p3) ... --- @@ -6758,72 +6758,72 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; CI-LABEL: name: test_load_private_p3_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; VI-LABEL: name: test_load_private_p3_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; GFX9-LABEL: name: test_load_private_p3_align1 @@ -6858,100 +6858,100 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p3_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p3_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p3_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:_(p3) = G_LOAD %0(p5) :: (load (p3), align 1, addrspace 5) + $vgpr0 = COPY %1(p3) ... --- @@ -7037,8 +7037,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p5) :: (load (p5), addrspace 5) + $vgpr0 = COPY %1(p5) ... --- @@ -7051,42 +7051,42 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-LABEL: name: test_load_private_p5_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_private_p5_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-LABEL: name: test_load_private_p5_align2 @@ -7121,60 +7121,60 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p5_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p5_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p5_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](i32) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 5) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p5) :: (load (p5), align 2, addrspace 5) + $vgpr0 = COPY %1(p5) ... --- @@ -7187,72 +7187,72 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; CI-LABEL: name: test_load_private_p5_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; VI-LABEL: name: test_load_private_p5_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; GFX9-LABEL: name: test_load_private_p5_align1 @@ -7287,100 +7287,100 @@ body: | ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_p5_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_p5_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p5_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](i32) ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:_(p5) = G_LOAD %0(p5) :: (load (p5), align 1, addrspace 5) + $vgpr0 = COPY %1(p5) ... --- @@ -7393,83 +7393,83 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_v2s8_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s8_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s8_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s8_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s8_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 5) - %2:_(s16) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<2 x i8>) = G_LOAD %0(p5) :: (load (<2 x i8>), addrspace 5) + %2:_(i16) = G_BITCAST %1(<2 x i8>) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -7482,150 +7482,150 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_private_v2s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_private_v2s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_private_v2s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_private_v2s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_private_v2s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_private_v2s8_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LSHR]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s8_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s8_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s8_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 5) - %2:_(<2 x s32>) = G_ANYEXT %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<2 x i8>) = G_LOAD %0(p5) :: (load (<2 x i8>), align 1, addrspace 5) + %2:_(<2 x i32>) = G_ANYEXT %1(<2 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -7638,340 +7638,340 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; SI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; CI-LABEL: name: test_load_private_v3s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; VI-LABEL: name: test_load_private_v3s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_load_private_v3s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX10-LABEL: name: test_load_private_v3s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX11-LABEL: name: test_load_private_v3s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX12-LABEL: name: test_load_private_v3s8_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s8_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s8_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX10-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX10-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s8_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX11-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C2]] + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 5, align 4) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p5) :: (load (<3 x i8>), align 4, addrspace 5) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -7984,430 +7984,430 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; SI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; CI-LABEL: name: test_load_private_v3s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C5]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; VI-LABEL: name: test_load_private_v3s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; GFX9-LABEL: name: test_load_private_v3s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX10-LABEL: name: test_load_private_v3s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX10-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX11-LABEL: name: test_load_private_v3s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX11-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; GFX12-LABEL: name: test_load_private_v3s8_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C2]](i32) + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR]], [[C1]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C4]](i16) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR3]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s8_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s8_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX10-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX10-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX10-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s8_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX11-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR4]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C5]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C5]](i16) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR4]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 5) - %2:_(s24) = G_BITCAST %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + %1:_(<3 x i8>) = G_LOAD %0(p5) :: (load (<3 x i8>), align 1, addrspace 5) + %2:_(i24) = G_BITCAST %1(<3 x i8>) + %3:_(i32) = G_ANYEXT %2(i24) + $vgpr0 = COPY %3(i32) ... --- @@ -8420,82 +8420,82 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_load_private_v4s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_load_private_v4s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_load_private_v4s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_load_private_v4s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_load_private_v4s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_load_private_v4s8_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s8_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s8_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s8_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 5) - %2:_(s32) = G_BITCAST %1 - $vgpr0 = COPY %2 + %1:_(<4 x i8>) = G_LOAD %0(p5) :: (load (<4 x i8>), addrspace 5) + %2:_(i32) = G_BITCAST %1(<4 x i8>) + $vgpr0 = COPY %2(i32) ... --- @@ -8508,110 +8508,110 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_private_v8s8_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_private_v8s8_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_private_v8s8_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_private_v8s8_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_private_v8s8_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_private_v8s8_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v8s8_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v8s8_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v8s8_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v8s8_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 5) - %2:_(<2 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<8 x i8>) = G_LOAD %0(p5) :: (load (<8 x i8>), addrspace 5) + %2:_(<2 x i32>) = G_BITCAST %1(<8 x i8>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -8624,529 +8624,529 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_private_v16s8_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_private_v16s8_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_private_v16s8_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_private_v16s8_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_private_v16s8_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_private_v16s8_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v16s8_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v16s8_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v16s8_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v16s8_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 5) - %2:_(<4 x s32>) = G_BITCAST %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:_(<16 x i8>) = G_LOAD %0(p5) :: (load (<16 x i8>), align 1, addrspace 5) + %2:_(<4 x i32>) = G_BITCAST %1(<16 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -9159,81 +9159,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; CI-LABEL: name: test_load_private_v2s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; VI-LABEL: name: test_load_private_v2s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_private_v2s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: test_load_private_v2s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: test_load_private_v2s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: test_load_private_v2s16_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s16_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s16_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s16_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p5) :: (load (<2 x i16>), addrspace 5) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -9246,135 +9246,135 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-LABEL: name: test_load_private_v2s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_private_v2s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_private_v2s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: test_load_private_v2s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: test_load_private_v2s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: test_load_private_v2s16_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s16_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s16_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s16_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p5) :: (load (<2 x i16>), align 2, addrspace 5) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -9387,205 +9387,205 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; CI-LABEL: name: test_load_private_v2s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; VI-LABEL: name: test_load_private_v2s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: test_load_private_v2s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX10-LABEL: name: test_load_private_v2s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX11-LABEL: name: test_load_private_v2s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; GFX12-LABEL: name: test_load_private_v2s16_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s16_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s16_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_LOAD %0(p5) :: (load (<2 x i16>), align 1, addrspace 5) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -9598,305 +9598,305 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-LABEL: name: test_load_private_v3s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-LABEL: name: test_load_private_v3s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_private_v3s16_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s16_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s16_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s16_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s16_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p5) :: (load (<3 x i16>), align 8, addrspace 5) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -9909,341 +9909,341 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C3]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-LABEL: name: test_load_private_v3s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-LABEL: name: test_load_private_v3s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_private_v3s16_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s16_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s16_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s16_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 5) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p5) :: (load (<3 x i16>), align 2, addrspace 5) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -10256,439 +10256,439 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C5]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL3]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR2]], [[C5]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL5]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX10-LABEL: name: test_load_private_v3s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LOAD]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC3]](i16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX11-LABEL: name: test_load_private_v3s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, align 1, addrspace 5) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, align 1, addrspace 5) + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, align 1, addrspace 5) + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: test_load_private_v3s16_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, align 1, addrspace 5) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 1, addrspace 5) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, align 1, addrspace 5) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, align 1, addrspace 5) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s16_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s16_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 5) - %2:_(<3 x s16>) = G_IMPLICIT_DEF - %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + %1:_(<3 x i16>) = G_LOAD %0(p5) :: (load (<3 x i16>), align 1, addrspace 5) + %2:_(<3 x i16>) = G_IMPLICIT_DEF + %3:_(<6 x i16>) = G_CONCAT_VECTORS %1(<3 x i16>), %2(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<6 x i16>) ... --- @@ -10700,109 +10700,109 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_private_v4s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_private_v4s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_private_v4s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_private_v4s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_private_v4s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_private_v4s16_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s16_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s16_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s16_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s16_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p5) :: (load (<4 x i16>), addrspace 5) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -10815,109 +10815,109 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_private_v4s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_private_v4s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_private_v4s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_private_v4s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_private_v4s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_private_v4s16_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s16_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s16_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s16_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s16_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p5) :: (load (<4 x i16>), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -10929,214 +10929,214 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_private_v4s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C1]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_private_v4s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LOAD1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LOAD2]], [[C1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LOAD3]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_private_v4s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_private_v4s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_private_v4s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_private_v4s16_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s16_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s16_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s16_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LOAD3]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p5) :: (load (<4 x i16>), align 2, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -11149,340 +11149,340 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C3]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C3]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; CI-LABEL: name: test_load_private_v4s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C3]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C3]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C3]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_load_private_v4s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C3]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[OR]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[OR1]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C4]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[OR3]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[OR4]], [[C3]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C4]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL5]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_load_private_v4s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX10-LABEL: name: test_load_private_v4s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i16>) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LOAD]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX11-LABEL: name: test_load_private_v4s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; GFX12-LABEL: name: test_load_private_v4s16_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i16>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x i16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s16_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s16_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[OR]](i32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[OR3]](i32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<4 x i16>) = G_LOAD %0(p5) :: (load (<4 x i16>), align 1, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -11495,109 +11495,109 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_private_v2s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_private_v2s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_private_v2s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_private_v2s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_private_v2s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_private_v2s32_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s32_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s32_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s32_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s32_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i32>), addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -11610,109 +11610,109 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_private_v2s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_private_v2s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_private_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_private_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_private_v2s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_private_v2s32_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s32_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s32_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s32_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i32>), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -11725,187 +11725,187 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_private_v2s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_private_v2s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_private_v2s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_private_v2s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_private_v2s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_private_v2s32_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s32_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s32_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s32_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i32>), align 2, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -11918,313 +11918,313 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; CI-LABEL: name: test_load_private_v2s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_load_private_v2s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_load_private_v2s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_load_private_v2s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX11-LABEL: name: test_load_private_v2s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_load_private_v2s32_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s32_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s32_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s32_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i32>), align 1, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -12237,417 +12237,417 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-LABEL: name: test_load_private_v3s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_load_private_v3s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_load_private_v3s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX10-LABEL: name: test_load_private_v3s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX11-LABEL: name: test_load_private_v3s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX12-LABEL: name: test_load_private_v3s32_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s32_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s32_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s32_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s32_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p5) :: (load (<3 x i32>), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -12660,130 +12660,130 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; CI-LABEL: name: test_load_private_v3s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_load_private_v3s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_load_private_v3s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX10-LABEL: name: test_load_private_v3s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX11-LABEL: name: test_load_private_v3s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX12-LABEL: name: test_load_private_v3s32_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s32_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s32_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s32_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p5) :: (load (<3 x i32>), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -12796,528 +12796,528 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_private_v4s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_private_v4s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_private_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_private_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_private_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_private_v4s32_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s32_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s32_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p5) :: (load (<4 x i32>), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -13330,151 +13330,151 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_private_v4s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_private_v4s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_private_v4s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_private_v4s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_private_v4s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_private_v4s32_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s32_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s32_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p5) :: (load (<4 x i32>), align 8, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -13487,151 +13487,151 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_private_v4s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_private_v4s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_private_v4s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_private_v4s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_private_v4s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_private_v4s32_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s32_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s32_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p5) :: (load (<4 x i32>), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -13644,297 +13644,297 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_private_v4s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_private_v4s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_private_v4s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 2, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 2, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_private_v4s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 2, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_private_v4s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_private_v4s32_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s32_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s32_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p5) :: (load (<4 x i32>), align 2, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -13947,528 +13947,528 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; CI-LABEL: name: test_load_private_v4s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; VI-LABEL: name: test_load_private_v4s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX9-LABEL: name: test_load_private_v4s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX10-LABEL: name: test_load_private_v4s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX11-LABEL: name: test_load_private_v4s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_load_private_v4s32_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s32_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s32_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p5) :: (load (<4 x i32>), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -14481,251 +14481,251 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; CI-LABEL: name: test_load_private_v8s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; VI-LABEL: name: test_load_private_v8s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; GFX9-LABEL: name: test_load_private_v8s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; GFX10-LABEL: name: test_load_private_v8s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; GFX11-LABEL: name: test_load_private_v8s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; GFX12-LABEL: name: test_load_private_v8s32_align32 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v8s32_align32 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v8s32_align32 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v8s32_align32 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v8s32_align32 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<8 x i32>) = G_LOAD %0(p5) :: (load (<8 x i32>), addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -14738,443 +14738,443 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; SI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; SI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; SI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; SI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; SI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; SI-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; SI-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; SI-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; SI-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; SI-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; CI-LABEL: name: test_load_private_v16s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; CI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; CI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; CI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; CI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; CI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; CI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; CI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; CI-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; CI-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; CI-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; CI-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; CI-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; VI-LABEL: name: test_load_private_v16s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; VI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; VI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; VI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; VI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; VI-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; VI-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; VI-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; VI-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; VI-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; VI-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; VI-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; GFX9-LABEL: name: test_load_private_v16s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; GFX9-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; GFX9-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; GFX9-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; GFX9-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; GFX9-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; GFX9-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; GFX9-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; GFX9-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; GFX9-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; GFX9-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; GFX9-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; GFX10-LABEL: name: test_load_private_v16s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; GFX10-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; GFX10-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; GFX10-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; GFX10-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; GFX10-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; GFX10-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; GFX11-LABEL: name: test_load_private_v16s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 5) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x s32>) from unknown-address + 48, addrspace 5) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 5) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x i32>) from unknown-address + 48, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; GFX12-LABEL: name: test_load_private_v16s32_align32 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 5) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x s32>) from unknown-address + 48, addrspace 5) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 5) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x i32>) from unknown-address + 48, addrspace 5) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v16s32_align32 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v16s32_align32 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD7]](p5) :: (load (i32) from unknown-address + 32, align 32, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 36 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i32) from unknown-address + 36, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 40 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD9]](p5) :: (load (i32) from unknown-address + 40, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 44 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i32) from unknown-address + 44, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD11]](p5) :: (load (i32) from unknown-address + 48, align 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 52 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD12]](p5) :: (load (i32) from unknown-address + 52, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 56 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD13]](p5) :: (load (i32) from unknown-address + 56, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 60 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i32) from unknown-address + 60, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32), [[LOAD8]](i32), [[LOAD9]](i32), [[LOAD10]](i32), [[LOAD11]](i32), [[LOAD12]](i32), [[LOAD13]](i32), [[LOAD14]](i32), [[LOAD15]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v16s32_align32 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x s32>) from unknown-address + 48, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x i32>) from unknown-address + 48, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v16s32_align32 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x s32>) from unknown-address + 48, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 32, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x i32>) from unknown-address + 32, align 32, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 48 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x i32>) from unknown-address + 48, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + %1:_(<16 x i32>) = G_LOAD %0(p5) :: (load (<16 x i32>), align 32, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -15187,158 +15187,158 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-LABEL: name: test_load_private_v2s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_private_v2s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_private_v2s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX10-LABEL: name: test_load_private_v2s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX11-LABEL: name: test_load_private_v2s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_private_v2s64_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s64_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s64_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s64_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s64_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p5) :: (load (<2 x i64>), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -15351,553 +15351,553 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; CI-LABEL: name: test_load_private_v2s64_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_load_private_v2s64_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_load_private_v2s64_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX10-LABEL: name: test_load_private_v2s64_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX11-LABEL: name: test_load_private_v2s64_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; GFX12-LABEL: name: test_load_private_v2s64_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x i64>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s64_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s64_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR5]](i32) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR8]](i32), [[OR11]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s64_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s64_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] - ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[OR8]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[OR9]](i32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[OR11]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[OR12]](i32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(i64) = G_SHL [[ANYEXT1]], [[COPY1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(i64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR6]](i64), [[OR13]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_LOAD %0(p5) :: (load (<2 x i64>), align 1, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -15910,244 +15910,244 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX10-LABEL: name: test_load_private_v3s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11-LABEL: name: test_load_private_v3s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p5) :: (load (i64) from unknown-address + 16, align 16, addrspace 5) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_private_v3s64_align32 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p5) :: (load (i64) from unknown-address + 16, align 16, addrspace 5) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s64_align32 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s64_align32 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[UV3]](i64) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s64_align32 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p5) :: (load (i64) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s64_align32 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i64) = G_LOAD [[PTR_ADD]](p5) :: (load (i64) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[LOAD]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64), [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[LOAD1]](i64), [[UV5]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) - %2:_(<4 x s64>) = G_IMPLICIT_DEF - %3:_(<4 x s64>) = G_INSERT %2, %1, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + %1:_(<3 x i64>) = G_LOAD %0(p5) :: (load (<3 x i64>), align 32, addrspace 5) + %2:_(<4 x i64>) = G_IMPLICIT_DEF + %3:_(<4 x i64>) = G_INSERT %2, %1(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(<4 x i64>) ... --- @@ -16160,258 +16160,258 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; SI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; CI-LABEL: name: test_load_private_v4s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; CI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_load_private_v4s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; VI-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_load_private_v4s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX10-LABEL: name: test_load_private_v4s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX11-LABEL: name: test_load_private_v4s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s64>) from unknown-address + 16, addrspace 5) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i64>) from unknown-address + 16, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; GFX12-LABEL: name: test_load_private_v4s64_align32 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s64>) from unknown-address + 16, addrspace 5) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i64>) from unknown-address + 16, addrspace 5) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s64_align32 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4s64_align32 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 32, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD6]](i32), [[LOAD7]](i32) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64), [[MV3]](i64) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4s64_align32 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s64>) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i64>) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s64_align32 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s64>) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[COPY]](p5) :: (load (<2 x i64>), align 32, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x i64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x i64>) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i64>) = G_CONCAT_VECTORS [[LOAD]](<2 x i64>), [[LOAD1]](<2 x i64>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x i64>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x i64>) = G_LOAD %0(p5) :: (load (<4 x i64>), addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -16424,162 +16424,162 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; CI-LABEL: name: test_load_private_v2p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; VI-LABEL: name: test_load_private_v2p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX9-LABEL: name: test_load_private_v2p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX10-LABEL: name: test_load_private_v2p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX11-LABEL: name: test_load_private_v2p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; GFX12-LABEL: name: test_load_private_v2p1_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2p1_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2p1_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x i32>) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2p1_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2p1_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x p1>) = G_LOAD %0(p5) :: (load (<2 x p1>), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x p1>) ... --- @@ -16592,262 +16592,262 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; CI-LABEL: name: test_load_private_v4p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; CI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; VI-LABEL: name: test_load_private_v4p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; GFX9-LABEL: name: test_load_private_v4p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; GFX10-LABEL: name: test_load_private_v4p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; GFX11-LABEL: name: test_load_private_v4p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, align 8, addrspace 5) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, align 8, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; GFX12-LABEL: name: test_load_private_v4p1_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, align 8, addrspace 5) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, align 8, addrspace 5) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4p1_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v4p1_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD5]](p5) :: (load (i32) from unknown-address + 24, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i32) from unknown-address + 28, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32), [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32), [[LOAD6]](i32), [[LOAD7]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x i32>) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v4p1_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, align 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, align 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4p1_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, align 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i32>), align 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x i32>) from unknown-address + 16, align 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + %1:_(<4 x p1>) = G_LOAD %0(p5) :: (load (<4 x p1>), align 8, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x p1>) ... --- @@ -16860,120 +16860,120 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; CI-LABEL: name: test_load_private_v2p3_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; VI-LABEL: name: test_load_private_v2p3_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX9-LABEL: name: test_load_private_v2p3_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX10-LABEL: name: test_load_private_v2p3_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX11-LABEL: name: test_load_private_v2p3_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; GFX12-LABEL: name: test_load_private_v2p3_align8 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2p3_align8 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2p3_align8 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2p3_align8 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2p3_align8 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i32>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x i32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x p3>) = G_LOAD %0(p5) :: (load (<2 x p3>), addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x p3>) ... --- @@ -16986,81 +16986,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s32_from_1_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s32_from_1_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s32_from_1_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s32_from_1_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p5) :: (load (i8), align 4, addrspace 5) + $vgpr0 = COPY %1(i32) ... --- @@ -17073,81 +17073,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; CI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; VI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX9-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX10-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX11-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; GFX12-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s32_from_2_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s32_from_2_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s32_from_2_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](i32) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s32_from_2_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_LOAD %0(p5) :: (load (i16), align 4, addrspace 5) + $vgpr0 = COPY %1(i32) ... --- @@ -17161,92 +17161,92 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s64_from_1_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s64_from_1_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s64_from_1_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i8), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -17259,92 +17259,92 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s64_from_2_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s64_from_2_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s64_from_2_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i16), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -17357,92 +17357,92 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s64_from_4_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s64_from_4_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s64_from_4_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -17455,125 +17455,125 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; CI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX10-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX11-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; GFX12-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; GFX12-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s128_from_4_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX10-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s128_from_4_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX11-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s128_from_4_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[LOAD]](i32), [[DEF]](i32) + ; UNALIGNED_GFX12-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[DEF1]](i64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](i128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_LOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -17586,92 +17586,92 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s64_from_2_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s64_from_2_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s64_from_2_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i16), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i16), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -17684,92 +17684,92 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; GFX12-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX10-LABEL: name: test_ext_load_private_s64_from_1_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX11-LABEL: name: test_ext_load_private_s64_from_1_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) ; ; UNALIGNED_GFX12-LABEL: name: test_ext_load_private_s64_from_1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i8), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[LOAD]](i32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_LOAD %0(p5) :: (load (i8), align 4, addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -17782,81 +17782,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_extload_private_v2s32_from_4_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_extload_private_v2s32_from_4_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_extload_private_v2s32_from_4_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i16>), align 1, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -17869,81 +17869,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_extload_private_v2s32_from_4_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_extload_private_v2s32_from_4_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_extload_private_v2s32_from_4_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), align 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i16>), align 2, addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -17956,81 +17956,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; GFX12-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_extload_private_v2s32_from_4_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_extload_private_v2s32_from_4_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_extload_private_v2s32_from_4_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_extload_private_v2s32_from_4_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x i32>) = G_LOAD [[COPY]](p5) :: (load (<2 x i16>), addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_LOAD %0(p5) :: (load (<2 x i16>), addrspace 5) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -18043,81 +18043,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; CI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; VI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX9-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX10-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX11-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; GFX12-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_extload_private_v3s32_from_6_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_extload_private_v3s32_from_6_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_extload_private_v3s32_from_6_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_extload_private_v3s32_from_6_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p5) :: (load (<3 x i16>), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -18130,81 +18130,81 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; CI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; VI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX9-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX10-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX11-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; GFX12-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX9-LABEL: name: test_extload_private_v4s32_from_8_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX10-LABEL: name: test_extload_private_v4s32_from_8_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX11-LABEL: name: test_extload_private_v4s32_from_8_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) ; ; UNALIGNED_GFX12-LABEL: name: test_extload_private_v4s32_from_8_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x i32>) = G_LOAD [[COPY]](p5) :: (load (<4 x i16>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x i32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<4 x i32>) = G_LOAD %0(p5) :: (load (<4 x i16>), align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -18217,805 +18217,805 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; SI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; SI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; SI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; SI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; SI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; SI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; SI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; SI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_private_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; CI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; CI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; CI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; CI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; CI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; CI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; CI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; CI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; CI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; CI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; CI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; CI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; CI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; CI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; CI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; CI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; CI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; CI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; CI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; CI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; CI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_private_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; VI-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; VI-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; VI-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; VI-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_private_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 1, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, align 1, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 1, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, align 1, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_private_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 1, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 1, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 1, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, align 1, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 1, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 1, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 1, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, align 1, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_private_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX12-LABEL: name: test_load_private_v2s96_align1 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 1, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 1, addrspace 5) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s96_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX9-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX9-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX9-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s96_align1 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX10-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX10-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX10-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX10-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX10-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX10-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s96_align1 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX11-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX11-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX11-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX11-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s96_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] - ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] - ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] - ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (i8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[OR1]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (i8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[OR4]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (i8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD7]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[OR7]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR2]](i32), [[OR5]](i32), [[OR8]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (i8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (i8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD10]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (i8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD14]](p5) :: (load (i8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[OR10]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (i8) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (i8) from unknown-address + 17, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD13]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (i8) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD18]](p5) :: (load (i8) from unknown-address + 19, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[OR13]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[SHL14]], [[OR12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (i8) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (i8) from unknown-address + 21, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD16]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (i8) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD22]](p5) :: (load (i8) from unknown-address + 23, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[OR16]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[SHL17]], [[OR15]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR11]](i32), [[OR14]](i32), [[OR17]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 5) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p5) :: (load (<2 x i96>), align 1, addrspace 5) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -19028,462 +19028,462 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; SI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; SI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_private_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; CI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; CI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; CI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_private_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; VI-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; VI-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_private_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 2, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 2, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, align 2, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 2, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 2, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, align 2, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_private_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, align 2, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 2, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, align 2, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 2, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, align 2, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 2, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, align 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, align 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, align 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, align 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_private_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX12-LABEL: name: test_load_private_v2s96_align2 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 2, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 2, addrspace 5) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s96_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; UNALIGNED_GFX9-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s96_align2 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX10-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; UNALIGNED_GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s96_align2 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s96_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LOAD]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (i16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LOAD1]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (i16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LOAD2]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (i16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD6]](p5) :: (load (i16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LOAD3]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (i16) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD8]](p5) :: (load (i16) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LOAD4]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (i16) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD10]](p5) :: (load (i16) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LOAD5]], [[C1]](i32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR3]](i32), [[OR4]](i32), [[OR5]](i32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 5) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p5) :: (load (<2 x i96>), align 2, addrspace 5) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -19496,256 +19496,256 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_private_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_private_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_private_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_private_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_private_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX12-LABEL: name: test_load_private_v2s96_align4 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s96_align4 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s96_align4 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s96_align4 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s96_align4 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 5) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p5) :: (load (<2 x i96>), align 4, addrspace 5) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... --- @@ -19758,254 +19758,254 @@ body: | ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; CI-LABEL: name: test_load_private_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; VI-LABEL: name: test_load_private_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX9-LABEL: name: test_load_private_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX10-LABEL: name: test_load_private_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX11-LABEL: name: test_load_private_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 16, addrspace 5) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 16, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; GFX12-LABEL: name: test_load_private_v2s96_align16 ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 16, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 16, addrspace 5) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s96_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 ; UNALIGNED_GFX9-NEXT: {{ $}} ; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v2s96_align16 ; UNALIGNED_GFX10: liveins: $vgpr0 ; UNALIGNED_GFX10-NEXT: {{ $}} ; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), align 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD]](p5) :: (load (i32) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD1]](p5) :: (load (i32) from unknown-address + 8, align 8, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD]](i32), [[LOAD1]](i32), [[LOAD2]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD2]](p5) :: (load (i32) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD3]](p5) :: (load (i32) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](i32) + ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(i32) = G_LOAD [[PTR_ADD4]](p5) :: (load (i32) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LOAD3]](i32), [[LOAD4]](i32), [[LOAD5]](i32) + ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[BUILD_VECTOR1]](<3 x i32>) + ; UNALIGNED_GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX10-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v2s96_align16 ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 16, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s96_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 16, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[COPY]](p5) :: (load (<3 x i32>), align 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](i32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x i32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x i32>) from unknown-address + 12, align 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(i96) = G_BITCAST [[LOAD1]](<3 x i32>) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY [[BITCAST]](i96) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(i96) = COPY [[BITCAST1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](i96) + ; UNALIGNED_GFX12-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](i96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 5) - %2:_(s96) = G_EXTRACT %1, 0 - %3:_(s96) = G_EXTRACT %1, 96 - $vgpr0_vgpr1_vgpr2 = COPY %2 - $vgpr3_vgpr4_vgpr5 = COPY %3 + %1:_(<2 x i96>) = G_LOAD %0(p5) :: (load (<2 x i96>), align 16, addrspace 5) + %2:_(i96) = G_EXTRACT %1(<2 x i96>), 0 + %3:_(i96) = G_EXTRACT %1(<2 x i96>), 96 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) + $vgpr3_vgpr4_vgpr5 = COPY %3(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index 3ec9a48b02e69..3cd78304eeee5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -14,30 +14,30 @@ body: | ; SI-LABEL: name: test_lshr_s32_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; VI-LABEL: name: test_lshr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX9-LABEL: name: test_lshr_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_LSHR %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_LSHR %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: test_lshr_s64_s64 @@ -48,33 +48,33 @@ body: | ; SI-LABEL: name: test_lshr_s64_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[TRUNC]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; VI-LABEL: name: test_lshr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[TRUNC]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; GFX9-LABEL: name: test_lshr_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[TRUNC]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_LSHR %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_lshr_s64_s32 @@ -85,30 +85,30 @@ body: | ; SI-LABEL: name: test_lshr_s64_s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; VI-LABEL: name: test_lshr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; GFX9-LABEL: name: test_lshr_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_lshr_s64_s16 @@ -119,37 +119,37 @@ body: | ; SI-LABEL: name: test_lshr_s64_s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; VI-LABEL: name: test_lshr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[AND]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) ; ; GFX9-LABEL: name: test_lshr_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_LSHR %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[AND]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_LSHR %0, %2(i16) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -161,40 +161,40 @@ body: | ; SI-LABEL: name: test_lshr_s16_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; VI-LABEL: name: test_lshr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_lshr_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_LSHR %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_LSHR %2, %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -206,42 +206,42 @@ body: | ; SI-LABEL: name: test_lshr_s16_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; VI-LABEL: name: test_lshr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_lshr_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_LSHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_LSHR %2, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -253,47 +253,47 @@ body: | ; SI-LABEL: name: test_lshr_s16_i8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; VI-LABEL: name: test_lshr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_lshr_s16_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s16) = G_LSHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[AND]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i16) = G_LSHR %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -305,48 +305,48 @@ body: | ; SI-LABEL: name: test_lshr_i8_i8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; VI-LABEL: name: test_lshr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND1]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_lshr_i8_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_LSHR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND1]], [[AND]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_LSHR %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -358,42 +358,42 @@ body: | ; SI-LABEL: name: test_lshr_v2s32_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[UV2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[UV2]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_lshr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[UV2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[UV2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_lshr_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[UV2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[UV2]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_LSHR %0, %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -405,45 +405,45 @@ body: | ; SI-LABEL: name: test_lshr_v3s32_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[UV3]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[UV4]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[UV5]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_lshr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[UV3]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[UV4]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[UV5]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_lshr_v3s32_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[UV3]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[UV4]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[UV5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32), [[LSHR2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_LSHR %0, %1(<3 x i32>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -455,42 +455,42 @@ body: | ; SI-LABEL: name: test_lshr_v2s64_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[UV2]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LSHR]](i64), [[LSHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_lshr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[UV2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LSHR]](i64), [[LSHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_lshr_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s64>) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[UV2]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[LSHR]](i64), [[LSHR1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i64>) = G_LSHR %0, %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -502,54 +502,54 @@ body: | ; SI-LABEL: name: test_lshr_v3s64_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[UV4]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[UV5]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[UV6]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LSHR]](i64), [[LSHR1]](i64), [[LSHR2]](i64), [[UV10]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_lshr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[UV4]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[UV5]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[UV6]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LSHR]](i64), [[LSHR1]](i64), [[LSHR2]](i64), [[UV10]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<3 x s64>) = G_EXTRACT %0, 0 - %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - %3:_(<3 x s64>) = G_LSHR %1, %2 - %4:_(<4 x s64>) = G_IMPLICIT_DEF - %5:_(<4 x s64>) = G_INSERT %4, %3, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[UV4]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[UV5]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[UV6]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[LSHR]](i64), [[LSHR1]](i64), [[LSHR2]](i64), [[UV10]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<3 x i64>) = G_EXTRACT %0(<4 x i64>), 0 + %2:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + %3:_(<3 x i64>) = G_LSHR %1, %2(<3 x i32>) + %4:_(<4 x i64>) = G_IMPLICIT_DEF + %5:_(<4 x i64>) = G_INSERT %4, %3(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(<4 x i64>) ... --- @@ -561,59 +561,59 @@ body: | ; SI-LABEL: name: test_lshr_v2s16_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; VI-LABEL: name: test_lshr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC2]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC2]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[TRUNC3]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR3]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_lshr_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_LSHR %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[COPY]], [[COPY1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -625,66 +625,66 @@ body: | ; SI-LABEL: name: test_lshr_v2s16_v2s32 ; SI: liveins: $vgpr0, $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[UV]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[UV1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[UV]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[UV1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; VI-LABEL: name: test_lshr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC2]](s16) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR1]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC2]](i16) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[TRUNC3]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR1]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR2]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: test_lshr_v2s16_v2s32 ; GFX9: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC2]](s16) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[LSHR2]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s16>) = G_LSHR %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC2]](i16) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[LSHR1]](i16), [[LSHR2]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x i16>) = G_LSHR %0, %1(<2 x i32>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -696,117 +696,117 @@ body: | ; SI-LABEL: name: test_lshr_v3s16_v3s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND2]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC3]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[TRUNC4]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[TRUNC5]](i16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR3]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR4]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV4]], [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<3 x s16>) = G_EXTRACT %0, 0 - %3:_(<3 x s16>) = G_EXTRACT %1, 0 - %4:_(<3 x s16>) = G_LSHR %2, %3 - %5:_(<4 x s16>) = G_IMPLICIT_DEF - %6:_(<4 x s16>) = G_INSERT %5, %4, 0 - $vgpr0_vgpr1 = COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV4]], [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[LSHR1]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %3:_(<3 x i16>) = G_EXTRACT %1(<4 x i16>), 0 + %4:_(<3 x i16>) = G_LSHR %2, %3(<3 x i16>) + %5:_(<4 x i16>) = G_IMPLICIT_DEF + %6:_(<4 x i16>) = G_INSERT %5, %4(<3 x i16>), 0 + $vgpr0_vgpr1 = COPY %6(<4 x i16>) ... --- @@ -817,134 +817,134 @@ body: | ; SI-LABEL: name: test_ashr_v3s16_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[LSHR1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND2]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND8]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR5]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC3]](i16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[TRUNC4]](i16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[TRUNC5]](i16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR2]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR3]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR4]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR5]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_LSHR %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV3]], [[UV9]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV13]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[LSHR1]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>) = G_LSHR %2, %4(<3 x i16>) + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) ... --- @@ -956,97 +956,97 @@ body: | ; SI-LABEL: name: test_lshr_v4s16_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[LSHR3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[AND]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[LSHR2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[AND2]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[LSHR3]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR6]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[LSHR7]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC4]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC5]](s16) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC6]](s16) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR6]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR7]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[TRUNC4]](i16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[TRUNC5]](i16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[TRUNC6]](i16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[TRUNC7]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR4]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR5]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR6]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[LSHR7]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV]], [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV1]], [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LSHR]](<2 x s16>), [[LSHR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_LSHR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV]], [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UV1]], [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[LSHR]](<2 x i16>), [[LSHR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_LSHR %0, %1(<4 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -1058,79 +1058,79 @@ body: | ; SI-LABEL: name: test_lshr_s128_s128 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s128 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_ZEXT %1 - %3:_(s128) = G_LSHR %0, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_ZEXT %1(i32) + %3:_(i128) = G_LSHR %0, %2(i128) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -1142,78 +1142,78 @@ body: | ; SI-LABEL: name: test_lshr_s128_s132 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s132 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1225,24 +1225,24 @@ body: | ; SI-LABEL: name: test_lshr_s128_s32_0 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) ; ; VI-LABEL: name: test_lshr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s32_0 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %3:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... @@ -1255,48 +1255,48 @@ body: | ; SI-LABEL: name: test_lshr_s128_s32_23 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s32_23 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 23 - %3:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 23 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1308,48 +1308,48 @@ body: | ; SI-LABEL: name: test_lshr_s128_s32_31 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s32_31 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 31 - %3:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1361,45 +1361,45 @@ body: | ; SI-LABEL: name: test_lshr_s128_s32_32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s32_32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 32 - %3:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 32 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1411,48 +1411,48 @@ body: | ; SI-LABEL: name: test_lshr_s128_s32_33 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s32_33 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 33 - %3:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[LSHR1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1464,39 +1464,39 @@ body: | ; SI-LABEL: name: test_lshr_s128_s32_127 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[LSHR]](i64), [[C1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_lshr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[LSHR]](i64), [[C1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_lshr_s128_s32_127 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 127 - %3:_(s128) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[LSHR]](i64), [[C1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 127 + %2:_(i128) = G_LSHR %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1508,247 +1508,247 @@ body: | ; SI-LABEL: name: test_lshr_s256_s256 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] - ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] - ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR8]], [[SHL5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) - ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] - ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] - ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] - ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] - ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] - ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV3]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[COPY1]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[SUB3]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV3]], [[SUB2]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV2]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[LSHR]], [[C3]] + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[COPY1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL1]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR5]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR3]], [[C3]] + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB1]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR6]], [[SHL3]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL2]], [[C3]] + ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL4]] + ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i64) = G_LSHR [[UV9]], [[SUB]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i64) = G_LSHR [[UV8]], [[SUB]](i32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV9]], [[SUB9]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR8]], [[SHL5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i64) = G_LSHR [[UV9]], [[SUB8]](i32) + ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[LSHR9]] + ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV8]], [[SELECT9]] + ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[LSHR7]], [[C3]] + ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT10]] + ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; SI-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV10]], [[SELECT12]] + ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV11]], [[SELECT13]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT14]](i64), [[SELECT15]](i64) + ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT1]], [[C3]] + ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[C3]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) ; ; VI-LABEL: name: test_lshr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL1]] - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] - ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] - ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] - ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR8]], [[SHL5]] - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) - ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] - ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] - ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] - ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] - ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] - ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV3]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[SUB3]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV3]], [[SUB2]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV2]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[LSHR]], [[C3]] + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[COPY1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL1]] + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR5]] + ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR3]], [[C3]] + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB1]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR6]], [[SHL3]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL2]], [[C3]] + ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL4]] + ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i64) = G_LSHR [[UV9]], [[SUB]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i64) = G_LSHR [[UV8]], [[SUB]](i32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV9]], [[SUB9]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR8]], [[SHL5]] + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i64) = G_LSHR [[UV9]], [[SUB8]](i32) + ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[LSHR9]] + ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV8]], [[SELECT9]] + ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[LSHR7]], [[C3]] + ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT10]] + ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; VI-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV10]], [[SELECT12]] + ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV11]], [[SELECT13]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT14]](i64), [[SELECT15]](i64) + ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT1]], [[C3]] + ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[C3]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) ; ; GFX9-LABEL: name: test_lshr_s256_s256 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL1]] - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] - ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) - ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR8]], [[SHL5]] - ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) - ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] - ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] - ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] - ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] - ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] - ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s256) = G_ZEXT %1 - %3:_(s256) = G_LSHR %0, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV3]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[SUB3]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV3]], [[SUB2]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV2]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[LSHR]], [[C3]] + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[COPY1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL1]] + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR5]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR3]], [[C3]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB1]](i32) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR6]], [[SHL3]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL2]], [[C3]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL4]] + ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(i64) = G_LSHR [[UV9]], [[SUB]](i32) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(i64) = G_LSHR [[UV8]], [[SUB]](i32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV9]], [[SUB9]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR8]], [[SHL5]] + ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(i64) = G_LSHR [[UV9]], [[SUB8]](i32) + ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[LSHR9]] + ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV8]], [[SELECT9]] + ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[LSHR7]], [[C3]] + ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT10]] + ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV10]], [[SELECT12]] + ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV11]], [[SELECT13]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT14]](i64), [[SELECT15]](i64) + ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT1]], [[C3]] + ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[C3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i256) = G_ZEXT %1(i32) + %3:_(i256) = G_LSHR %0, %2(i256) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(i256) ... --- @@ -1760,129 +1760,129 @@ body: | ; SI-LABEL: name: test_lshr_v2s128_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[UV2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[UV2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV7]], [[UV3]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[UV3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV7]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[LSHR5]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV6]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR3]], [[C2]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[UV2]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[UV2]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV7]], [[UV3]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[UV3]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL1]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV7]], [[SUB2]](i32) + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[LSHR5]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV6]], [[SELECT3]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[LSHR3]], [[C2]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT4]](i64), [[SELECT5]](i64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; VI-LABEL: name: test_lshr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[UV2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[UV2]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV7]], [[UV3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[UV3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL1]] - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV7]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[LSHR5]] - ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV6]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR3]], [[C2]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[UV2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[UV2]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV7]], [[UV3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[UV3]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL1]] + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV7]], [[SUB2]](i32) + ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[LSHR5]] + ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV6]], [[SELECT3]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[LSHR3]], [[C2]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT4]](i64), [[SELECT5]](i64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; GFX9-LABEL: name: test_lshr_v2s128_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[UV2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[UV2]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV7]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[UV3]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB3]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL1]] - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV7]], [[SUB2]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[LSHR5]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV6]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR3]], [[C2]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s128>) = G_LSHR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[UV2]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[UV2]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV7]], [[UV3]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[UV3]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[SUB3]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL1]] + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV7]], [[SUB2]](i32) + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[LSHR5]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV6]], [[SELECT3]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[LSHR3]], [[C2]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT4]](i64), [[SELECT5]](i64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i128>) = G_LSHR %0, %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<2 x i128>) ... --- @@ -1894,107 +1894,107 @@ body: | ; SI-LABEL: name: test_lshr_s65_s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[TRUNC]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C4]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %23(i64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C2]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[TRUNC]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C2]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[TRUNC]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[TRUNC]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C4]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; VI-LABEL: name: test_lshr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C3]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[TRUNC]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C4]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %23(i64) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C2]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[TRUNC]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C2]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C3]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[TRUNC]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[TRUNC]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C4]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX9-LABEL: name: test_lshr_s65_s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C3]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[TRUNC]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C4]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s65) = G_TRUNC %0 - %3:_(s65) = G_LSHR %2, %3 - %4:_(s96) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %23(i64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C3]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[TRUNC]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[TRUNC]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C4]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i65) = G_TRUNC %0(i96) + %3:_(i65) = G_LSHR %2, %3(i65) + %4:_(i96) = G_ANYEXT %3(i65) + $vgpr0_vgpr1_vgpr2 = COPY %4(i96) ... --- @@ -2006,104 +2006,104 @@ body: | ; SI-LABEL: name: test_lshr_s65_s32_constant8 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[TRUNC]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C4]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %23(i64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C2]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[TRUNC]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C2]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[TRUNC]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[TRUNC]](i32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C4]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; VI-LABEL: name: test_lshr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C3]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[TRUNC]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C4]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %23(i64) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C2]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[TRUNC]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C2]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C3]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[TRUNC]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[TRUNC]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C4]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX9-LABEL: name: test_lshr_s65_s32_constant8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C3]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[TRUNC]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C4]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s65) = G_TRUNC %0 - %3:_(s65) = G_LSHR %2, %3 - %4:_(s96) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %23(i64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C3]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[TRUNC]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[TRUNC]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C4]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i65) = G_TRUNC %0(i96) + %3:_(i65) = G_LSHR %2, %3(i65) + %4:_(i96) = G_ANYEXT %3(i65) + $vgpr0_vgpr1_vgpr2 = COPY %4(i96) ... --- @@ -2115,110 +2115,110 @@ body: | ; SI-LABEL: name: test_lshr_s65_s32_known_pow2 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C3]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SHL]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C3]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C4]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SHL]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[SHL]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL1]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C5]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C3]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[SHL]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C4]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SHL]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[SHL]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C5]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) ; ; VI-LABEL: name: test_lshr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C3]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SHL]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C3]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C4]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SHL]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[SHL]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL1]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C5]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C3]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[SHL]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C4]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SHL]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[SHL]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C5]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) ; ; GFX9-LABEL: name: test_lshr_s65_s32_known_pow2 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C3]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SHL]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C4]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SHL]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[SHL]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[SUB1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL1]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C5]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_SHL %2, %1 - %4:_(s65) = G_TRUNC %0 - %5:_(s65) = G_LSHR %4, %3 - %6:_(s96) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C3]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[SHL]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C4]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SHL]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[AND]], [[SHL]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[AND1]], [[SUB1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[AND1]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[LSHR2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[AND]], [[SELECT]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[LSHR]], [[C5]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT1]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_SHL %2, %1(i32) + %4:_(i65) = G_TRUNC %0(i96) + %5:_(i65) = G_LSHR %4, %3(i32) + %6:_(i96) = G_ANYEXT %5(i65) + $vgpr0_vgpr1_vgpr2 = COPY %6(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir index be3fe91407fdf..955a85f464233 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: memcpy_test ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8)) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV1]](p0) :: (load (i8)) + ; CHECK-NEXT: G_STORE [[LOAD]](i32), [[MV]](p0) :: (store (i8)) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32) - %6:_(s32) = G_CONSTANT i32 1 - %7:_(s64) = G_ZEXT %6:_(s32) - G_MEMCPY %2:_(p0), %5:_(p0), %7:_(s64), 0 :: (store (s8)), (load (s8)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p0) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 1 + %7:_(i64) = G_ZEXT %6(i32) + G_MEMCPY %2(p0), %5(p0), %7(i64), 0 :: (store (i8)), (load (i8)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir index a82ca30209820..c9767faafcade 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: memcpyinline_test ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8)) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV1]](p0) :: (load (i8)) + ; CHECK-NEXT: G_STORE [[LOAD]](i32), [[MV]](p0) :: (store (i8)) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32) - %6:_(s32) = G_CONSTANT i32 1 - %7:_(s64) = G_ZEXT %6:_(s32) - G_MEMCPY_INLINE %2:_(p0), %5:_(p0), %7:_(s64) :: (store (s8)), (load (s8)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p0) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 1 + %7:_(i64) = G_ZEXT %6(i32) + G_MEMCPY_INLINE %2(p0), %5(p0), %7(i64) :: (store (i8)), (load (i8)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir index e7cfaab135beb..a306a93b44a99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: memmove_test ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8)) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV1]](p0) :: (load (i8)) + ; CHECK-NEXT: G_STORE [[LOAD]](i32), [[MV]](p0) :: (store (i8)) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32) - %6:_(s32) = G_CONSTANT i32 1 - %7:_(s64) = G_ZEXT %6:_(s32) - G_MEMMOVE %2:_(p0), %5:_(p0), %7:_(s64), 0 :: (store (s8)), (load (s8)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p0) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 1 + %7:_(i64) = G_ZEXT %6(i32) + G_MEMMOVE %2(p0), %5(p0), %7(i64), 0 :: (store (i8)), (load (i8)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir index 021cebbb6cb49..67899f4ab6556 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir @@ -10,23 +10,23 @@ body: | ; CHECK-LABEL: name: memset_test ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[MV]](p0) :: (store (s8)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i8) = COPY [[TRUNC]](i8) + ; CHECK-NEXT: G_STORE [[COPY2]](i32), [[MV]](p0) :: (store (i8)) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s16) = G_TRUNC %3:_(s32) - %5:_(s8) = G_TRUNC %4:_(s16) - %6:_(s32) = G_CONSTANT i32 1 - %7:_(s64) = G_ZEXT %6:_(s32) - G_MEMSET %2:_(p0), %5:_(s8), %7:_(s64), 0 :: (store (s8)) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p0) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i16) = G_TRUNC %3(i32) + %5:_(i8) = G_TRUNC %4(i16) + %6:_(i32) = G_CONSTANT i32 1 + %7:_(i64) = G_ZEXT %6(i32) + G_MEMSET %2(p0), %5(i8), %7(i64), 0 :: (store (i8)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir index 123454a26af2b..9b30bc7515a60 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir @@ -6,14 +6,14 @@ name: test_merge_s32_s32_s64 body: | bb.0: ; CHECK-LABEL: name: test_merge_s32_s32_s64 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_MERGE_VALUES %0:_(s32), %1:_(s32) - $vgpr0_vgpr1 = COPY %2(s64) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[C]](i32), [[C1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -21,14 +21,14 @@ name: test_merge_s32_s32_v2s32 body: | bb.0: ; CHECK-LABEL: name: test_merge_s32_s32_v2s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32) - $vgpr0_vgpr1 = COPY %2(<2 x s32>) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -36,16 +36,16 @@ name: test_merge_s32_s32_s32_v3s32 body: | bb.0: ; CHECK-LABEL: name: test_merge_s32_s32_s32_v3s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 2 - %3:_(<3 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %2:_(s32) - $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x s32>) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C1]](i32), [[C2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -53,14 +53,14 @@ name: test_merge_s64_s64_s128 body: | bb.0: ; CHECK-LABEL: name: test_merge_s64_s64_s128 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x s64>) + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(<2 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -68,18 +68,18 @@ name: test_merge_s64_s64_s64_s64_v4s64 body: | bb.0: ; CHECK-LABEL: name: test_merge_s64_s64_s64_s64_v4s64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64), [[C3]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(s64) = G_CONSTANT i64 2 - %3:_(s64) = G_CONSTANT i64 3 - %4:_(<4 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64), %2(s64), %3(s64) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<4 x s64>) + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C1]](i64), [[C2]](i64), [[C3]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(i64) = G_CONSTANT i64 2 + %3:_(i64) = G_CONSTANT i64 3 + %4:_(<4 x i64>) = G_BUILD_VECTOR %0(i64), %1(i64), %2(i64), %3(i64) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<4 x i64>) ... # FIXME: Should be split up @@ -87,28 +87,28 @@ body: | # name: test_merge_17_x_i32 # body: | # bb.0: -# %0:_(s32) = G_CONSTANT i32 0 -# %1:_(s32) = G_CONSTANT i32 1 -# %2:_(s32) = G_CONSTANT i32 2 -# %3:_(s32) = G_CONSTANT i32 3 +# %0:_(i32) = G_CONSTANT i32 0 +# %1:_(i32) = G_CONSTANT i32 1 +# %2:_(i32) = G_CONSTANT i32 2 +# %3:_(i32) = G_CONSTANT i32 3 -# %4:_(s32) = G_CONSTANT i32 4 -# %5:_(s32) = G_CONSTANT i32 5 -# %6:_(s32) = G_CONSTANT i32 6 -# %7:_(s32) = G_CONSTANT i32 7 +# %4:_(i32) = G_CONSTANT i32 4 +# %5:_(i32) = G_CONSTANT i32 5 +# %6:_(i32) = G_CONSTANT i32 6 +# %7:_(i32) = G_CONSTANT i32 7 -# %8:_(s32) = G_CONSTANT i32 8 -# %9:_(s32) = G_CONSTANT i32 9 -# %10:_(s32) = G_CONSTANT i32 10 -# %11:_(s32) = G_CONSTANT i32 11 +# %8:_(i32) = G_CONSTANT i32 8 +# %9:_(i32) = G_CONSTANT i32 9 +# %10:_(i32) = G_CONSTANT i32 10 +# %11:_(i32) = G_CONSTANT i32 11 -# %12:_(s32) = G_CONSTANT i32 12 -# %13:_(s32) = G_CONSTANT i32 13 -# %14:_(s32) = G_CONSTANT i32 14 -# %15:_(s32) = G_CONSTANT i32 15 +# %12:_(i32) = G_CONSTANT i32 12 +# %13:_(i32) = G_CONSTANT i32 13 +# %14:_(i32) = G_CONSTANT i32 14 +# %15:_(i32) = G_CONSTANT i32 15 -# %16:_(s32) = G_CONSTANT i32 16 +# %16:_(i32) = G_CONSTANT i32 16 -# %17:_(<17 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32) -# S_ENDPGM implicit %17(<17 x s32>) +# %17:_(<17 x i32>) = G_BUILD_VECTOR %0:_(i32), %1:_(i32), %2:_(i32), %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32), %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32), %11:_(i32), %12:_(i32), %13:_(i32), %14:_(i32), %15:_(i32), %16:_(i32) +# S_ENDPGM implicit %17(<17 x i32>) # ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir index f47c9b89a81d0..4128f4bdb6004 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -3,7 +3,7 @@ # FileCheck -check-prefix=ERR %s < %t # ERR-NOT: remark: -# ERR: remark: :0:0: unable to legalize instruction: %197:_(s136) = G_INSERT %209:_, %206:_(s8), 128 (in function: test_merge_s68_s17_s17_s17_s17) +# ERR: remark: :0:0: unable to legalize instruction: %197:_(i136) = G_INSERT %209:_, %206:_(i8), 128 (in function: test_merge_s68_s17_s17_s17_s17) # ERR-NOT: remark: @@ -15,73 +15,73 @@ body: | ; CHECK-LABEL: name: test_merge_p1_s8 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY8]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY9]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY10]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY8]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY9]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[COPY10]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C1]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C3]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s8) = G_TRUNC %0 - %9:_(s8) = G_TRUNC %1 - %10:_(s8) = G_TRUNC %2 - %11:_(s8) = G_TRUNC %3 - %12:_(s8) = G_TRUNC %4 - %13:_(s8) = G_TRUNC %5 - %14:_(s8) = G_TRUNC %6 - %15:_(s8) = G_TRUNC %7 - %16:_(p1) = G_MERGE_VALUES %8, %9, %10, %11, %12, %13, %14, %15 - $vgpr0_vgpr1 = COPY %16 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i32) = COPY $vgpr6 + %7:_(i32) = COPY $vgpr7 + %8:_(i8) = G_TRUNC %0(i32) + %9:_(i8) = G_TRUNC %1(i32) + %10:_(i8) = G_TRUNC %2(i32) + %11:_(i8) = G_TRUNC %3(i32) + %12:_(i8) = G_TRUNC %4(i32) + %13:_(i8) = G_TRUNC %5(i32) + %14:_(i8) = G_TRUNC %6(i32) + %15:_(i8) = G_TRUNC %7(i32) + %16:_(p1) = G_MERGE_VALUES %8(i8), %9(i8), %10(i8), %11(i8), %12(i8), %13(i8), %14(i8), %15(i8) + $vgpr0_vgpr1 = COPY %16(p1) ... @@ -90,19 +90,19 @@ name: test_merge_s16_s8_s8 body: | bb.0: ; CHECK-LABEL: name: test_merge_s16_s8_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s8) = G_CONSTANT i8 1 - %2:_(s16) = G_MERGE_VALUES %0, %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C1]], [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i8) = G_CONSTANT i8 1 + %2:_(i16) = G_MERGE_VALUES %0(i8), %1(i8) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -110,32 +110,32 @@ name: test_merge_s24_s8_s8_s8 body: | bb.0: ; CHECK-LABEL: name: test_merge_s24_s8_s8_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C3]], [[TRUNC1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s8) = G_CONSTANT i8 1 - %2:_(s8) = G_CONSTANT i8 2 - %3:_(s24) = G_MERGE_VALUES %0, %1, %2 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C1]], [[TRUNC]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C4]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[C3]], [[TRUNC1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C5]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](i32) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i8) = G_CONSTANT i8 1 + %2:_(i8) = G_CONSTANT i8 2 + %3:_(i24) = G_MERGE_VALUES %0(i8), %1(i8), %2(i8) + %4:_(i32) = G_ANYEXT %3(i24) + $vgpr0 = COPY %4(i32) ... --- @@ -143,26 +143,26 @@ name: test_merge_s32_s8_s8_s8_s8 body: | bb.0: ; CHECK-LABEL: name: test_merge_s32_s8_s8_s8_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C5]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C6]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s8) = G_CONSTANT i8 1 - %2:_(s8) = G_CONSTANT i8 2 - %3:_(s8) = G_CONSTANT i8 3 - %4:_(s32) = G_MERGE_VALUES %0, %1, %2, %3 - $vgpr0 = COPY %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C5]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C6]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](i32) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i8) = G_CONSTANT i8 1 + %2:_(i8) = G_CONSTANT i8 2 + %3:_(i8) = G_CONSTANT i8 3 + %4:_(i32) = G_MERGE_VALUES %0(i8), %1(i8), %2(i8), %3(i8) + $vgpr0 = COPY %4(i32) ... --- @@ -173,14 +173,14 @@ body: | ; CHECK-LABEL: name: test_merge_s64_s32_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[MV]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - $vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[MV]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + $vgpr1_vgpr2 = COPY %2(i64) ... --- @@ -191,32 +191,32 @@ body: | ; CHECK-LABEL: name: test_merge_s64_s16_s16_s16_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[MV]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s16) = G_TRUNC %0 - %5:_(s16) = G_TRUNC %1 - %6:_(s16) = G_TRUNC %2 - %7:_(s16) = G_TRUNC %3 - %8:_(s64) = G_MERGE_VALUES %4, %5, %6, %7 - $vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[MV]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i16) = G_TRUNC %0(i32) + %5:_(i16) = G_TRUNC %1(i32) + %6:_(i16) = G_TRUNC %2(i32) + %7:_(i16) = G_TRUNC %3(i32) + %8:_(i64) = G_MERGE_VALUES %4(i16), %5(i16), %6(i16), %7(i16) + $vgpr1_vgpr2 = COPY %8(i64) ... --- @@ -225,57 +225,57 @@ name: test_merge_s24_s4_s4_s4_s4_s4_s4 body: | bb.0: ; CHECK-LABEL: name: test_merge_s24_s4_s4_s4_s4_s4_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C5]], [[TRUNC]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C7]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C6]](s16) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[COPY4]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[COPY3]], [[TRUNC3]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C7]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s24) = G_TRUNC [[OR6]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s24) - %0:_(s4) = G_CONSTANT i4 0 - %1:_(s4) = G_CONSTANT i4 1 - %2:_(s4) = G_CONSTANT i4 2 - %3:_(s4) = G_CONSTANT i4 3 - %4:_(s4) = G_CONSTANT i4 4 - %5:_(s4) = G_CONSTANT i4 5 - %6:_(s24) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5 - S_NOP 0, implicit %6 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C5]], [[TRUNC]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C7]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[OR]], [[TRUNC1]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C8]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[OR1]], [[TRUNC2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[C6]](i16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[COPY4]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[COPY3]], [[TRUNC3]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C9]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C7]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i16) = G_OR [[OR3]], [[TRUNC4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C9]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C8]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[SHL5]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i16) = G_OR [[OR4]], [[TRUNC5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR5]](i16) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C10]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i24) = G_TRUNC [[OR6]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](i24) + %0:_(i4) = G_CONSTANT i4 0 + %1:_(i4) = G_CONSTANT i4 1 + %2:_(i4) = G_CONSTANT i4 2 + %3:_(i4) = G_CONSTANT i4 3 + %4:_(i4) = G_CONSTANT i4 4 + %5:_(i4) = G_CONSTANT i4 5 + %6:_(i24) = G_MERGE_VALUES %0(i4), %1(i4), %2(i4), %3(i4), %4(i4), %5(i4) + S_NOP 0, implicit %6(i24) ... --- @@ -283,58 +283,58 @@ name: test_merge_s28_s4_s4_s4_s4_s4_s4_s4 body: | bb.0: ; CHECK-LABEL: name: test_merge_s28_s4_s4_s4_s4_s4_s4_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C7]](s16) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[COPY4]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[COPY3]], [[TRUNC3]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C8]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C10]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C9]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s28) = G_TRUNC [[OR6]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s28) - %0:_(s4) = G_CONSTANT i4 0 - %1:_(s4) = G_CONSTANT i4 1 - %2:_(s4) = G_CONSTANT i4 2 - %3:_(s4) = G_CONSTANT i4 3 - %4:_(s4) = G_CONSTANT i4 4 - %5:_(s4) = G_CONSTANT i4 5 - %6:_(s4) = G_CONSTANT i4 6 - %7:_(s28) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6 - S_NOP 0, implicit %7 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C6]], [[TRUNC]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C8]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[OR]], [[TRUNC1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C9]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[OR1]], [[TRUNC2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i16) = COPY [[C7]](i16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[COPY4]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[COPY3]], [[TRUNC3]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C5]], [[C8]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i16) = G_OR [[OR3]], [[TRUNC4]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C10]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C9]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[SHL5]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i16) = G_OR [[OR4]], [[TRUNC5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR5]](i16) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C11]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i28) = G_TRUNC [[OR6]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](i28) + %0:_(i4) = G_CONSTANT i4 0 + %1:_(i4) = G_CONSTANT i4 1 + %2:_(i4) = G_CONSTANT i4 2 + %3:_(i4) = G_CONSTANT i4 3 + %4:_(i4) = G_CONSTANT i4 4 + %5:_(i4) = G_CONSTANT i4 5 + %6:_(i4) = G_CONSTANT i4 6 + %7:_(i28) = G_MERGE_VALUES %0(i4), %1(i4), %2(i4), %3(i4), %4(i4), %5(i4), %6(i4) + S_NOP 0, implicit %7(i28) ... --- @@ -342,45 +342,45 @@ name: test_merge_s32_s4_s4_s4_s4_s4_s4_s4_s4 body: | bb.0: ; CHECK-LABEL: name: test_merge_s32_s4_s4_s4_s4_s4_s4_s4_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C8]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C9]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C10]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C11]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C12]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C13]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](s32) - %0:_(s4) = G_CONSTANT i4 0 - %1:_(s4) = G_CONSTANT i4 1 - %2:_(s4) = G_CONSTANT i4 2 - %3:_(s4) = G_CONSTANT i4 3 - %4:_(s4) = G_CONSTANT i4 4 - %5:_(s4) = G_CONSTANT i4 5 - %6:_(s4) = G_CONSTANT i4 6 - %7:_(s4) = G_CONSTANT i4 7 - %8:_(s32) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6, %7 - S_NOP 0, implicit %8 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C8]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C9]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[C10]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C5]], [[C11]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[C6]], [[C12]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[C7]], [[C13]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](i32) + %0:_(i4) = G_CONSTANT i4 0 + %1:_(i4) = G_CONSTANT i4 1 + %2:_(i4) = G_CONSTANT i4 2 + %3:_(i4) = G_CONSTANT i4 3 + %4:_(i4) = G_CONSTANT i4 4 + %5:_(i4) = G_CONSTANT i4 5 + %6:_(i4) = G_CONSTANT i4 6 + %7:_(i4) = G_CONSTANT i4 7 + %8:_(i32) = G_MERGE_VALUES %0(i4), %1(i4), %2(i4), %3(i4), %4(i4), %5(i4), %6(i4), %7(i4) + S_NOP 0, implicit %8(i32) ... --- @@ -388,51 +388,51 @@ name: test_merge_s64_s8_s8_s8_s8_s8_s8_s8_s8 body: | bb.0: ; CHECK-LABEL: name: test_merge_s64_s8_s8_s8_s8_s8_s8_s8_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[TRUNC]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC1]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC2]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C5]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C8]], [[TRUNC3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s8) = G_CONSTANT i8 1 - %2:_(s8) = G_CONSTANT i8 2 - %3:_(s8) = G_CONSTANT i8 3 - %4:_(s8) = G_CONSTANT i8 4 - %5:_(s8) = G_CONSTANT i8 5 - %6:_(s8) = G_CONSTANT i8 6 - %7:_(s8) = G_CONSTANT i8 7 - %8:_(s64) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6, %7 - $vgpr0_vgpr1 = COPY %8 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C4]], [[TRUNC]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[C6]], [[TRUNC1]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C5]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[C7]], [[TRUNC2]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[C5]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[C8]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C9]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C9]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i8) = G_CONSTANT i8 1 + %2:_(i8) = G_CONSTANT i8 2 + %3:_(i8) = G_CONSTANT i8 3 + %4:_(i8) = G_CONSTANT i8 4 + %5:_(i8) = G_CONSTANT i8 5 + %6:_(i8) = G_CONSTANT i8 6 + %7:_(i8) = G_CONSTANT i8 7 + %8:_(i64) = G_MERGE_VALUES %0(i8), %1(i8), %2(i8), %3(i8), %4(i8), %5(i8), %6(i8), %7(i8) + $vgpr0_vgpr1 = COPY %8(i64) ... --- @@ -440,74 +440,74 @@ name: test_merge_s96_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8 body: | bb.0: ; CHECK-LABEL: name: test_merge_s96_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C9]], [[TRUNC1]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C10]], [[TRUNC2]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[COPY3]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C11]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C8]](s16) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[COPY5]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[COPY4]], [[TRUNC4]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[COPY6]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[C12]], [[TRUNC5]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s8) = G_CONSTANT i8 1 - %2:_(s8) = G_CONSTANT i8 2 - %3:_(s8) = G_CONSTANT i8 3 - %4:_(s8) = G_CONSTANT i8 4 - %5:_(s8) = G_CONSTANT i8 5 - %6:_(s8) = G_CONSTANT i8 6 - %7:_(s8) = G_CONSTANT i8 7 - %9:_(s8) = G_CONSTANT i8 8 - %10:_(s8) = G_CONSTANT i8 9 - %11:_(s8) = G_CONSTANT i8 10 - %12:_(s8) = G_CONSTANT i8 11 - - %13:_(s96) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6, %7, %9, %10, %11, %12 - $vgpr0_vgpr1_vgpr2 = COPY %13 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C7]], [[TRUNC]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[C9]], [[TRUNC1]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[C10]], [[TRUNC2]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C3]], [[COPY3]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[C11]], [[TRUNC3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i16) = COPY [[C8]](i16) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C5]], [[COPY5]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i16) = G_OR [[COPY4]], [[TRUNC4]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i16) = G_CONSTANT i16 10 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[C6]], [[COPY6]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[SHL5]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i16) = G_OR [[C12]], [[TRUNC5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C13]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C13]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL7]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[OR4]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[OR5]](i16) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C13]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL8]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[OR6]](i32), [[OR7]](i32), [[OR8]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i8) = G_CONSTANT i8 1 + %2:_(i8) = G_CONSTANT i8 2 + %3:_(i8) = G_CONSTANT i8 3 + %4:_(i8) = G_CONSTANT i8 4 + %5:_(i8) = G_CONSTANT i8 5 + %6:_(i8) = G_CONSTANT i8 6 + %7:_(i8) = G_CONSTANT i8 7 + %8:_(i8) = G_CONSTANT i8 8 + %9:_(i8) = G_CONSTANT i8 9 + %10:_(i8) = G_CONSTANT i8 10 + %11:_(i8) = G_CONSTANT i8 11 + %12:_(i96) = G_MERGE_VALUES %0(i8), %1(i8), %2(i8), %3(i8), %4(i8), %5(i8), %6(i8), %7(i8), %8(i8), %9(i8), %10(i8), %11(i8) + $vgpr0_vgpr1_vgpr2 = COPY %12(i96) + ... --- @@ -515,29 +515,29 @@ name: test_merge_s96_s16_s16_s16_s16_s16_s16 body: | bb.0: ; CHECK-LABEL: name: test_merge_s96_s16_s16_s16_s16_s16_s16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL2]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s16) = G_CONSTANT i16 0 - %1:_(s16) = G_CONSTANT i16 1 - %2:_(s16) = G_CONSTANT i16 2 - %3:_(s16) = G_CONSTANT i16 3 - %4:_(s16) = G_CONSTANT i16 4 - %5:_(s16) = G_CONSTANT i16 5 - %8:_(s96) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C4]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C3]], [[SHL1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C6]], [[C2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[C5]], [[SHL2]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i16) = G_CONSTANT i16 0 + %1:_(i16) = G_CONSTANT i16 1 + %2:_(i16) = G_CONSTANT i16 2 + %3:_(i16) = G_CONSTANT i16 3 + %4:_(i16) = G_CONSTANT i16 4 + %5:_(i16) = G_CONSTANT i16 5 + %6:_(i96) = G_MERGE_VALUES %0(i16), %1(i16), %2(i16), %3(i16), %4(i16), %5(i16) + $vgpr0_vgpr1_vgpr2 = COPY %6(i96) ... --- @@ -545,53 +545,53 @@ name: test_merge_s56_s8_s8_s8_s8_s8_s8_s8 body: | bb.0: ; CHECK-LABEL: name: test_merge_s56_s8_s8_s8_s8_s8_s8_s8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[TRUNC]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C5]], [[TRUNC1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC2]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C4]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC4]](s56) - %0:_(s8) = G_CONSTANT i8 0 - %1:_(s8) = G_CONSTANT i8 1 - %2:_(s8) = G_CONSTANT i8 2 - %3:_(s8) = G_CONSTANT i8 3 - %4:_(s8) = G_CONSTANT i8 4 - %5:_(s8) = G_CONSTANT i8 5 - %6:_(s8) = G_CONSTANT i8 6 - %7:_(s56) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6 - S_NOP 0, implicit %7 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[C3]], [[TRUNC]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[COPY1]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[C5]], [[TRUNC1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[COPY2]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL2]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[C6]], [[TRUNC2]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C8]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C4]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL3]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[C7]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C9]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C9]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i56) = G_TRUNC [[MV]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC4]](i56) + %0:_(i8) = G_CONSTANT i8 0 + %1:_(i8) = G_CONSTANT i8 1 + %2:_(i8) = G_CONSTANT i8 2 + %3:_(i8) = G_CONSTANT i8 3 + %4:_(i8) = G_CONSTANT i8 4 + %5:_(i8) = G_CONSTANT i8 5 + %6:_(i8) = G_CONSTANT i8 6 + %7:_(i56) = G_MERGE_VALUES %0(i8), %1(i8), %2(i8), %3(i8), %4(i8), %5(i8), %6(i8) + S_NOP 0, implicit %7(i56) ... --- @@ -599,252 +599,252 @@ name: test_merge_s68_s17_s17_s17_s17 body: | bb.0: ; CHECK-LABEL: name: test_merge_s68_s17_s17_s17_s17 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C17]](s32) - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) - ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32) - ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32) - ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) - ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) - ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) - ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) - ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) - ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) - ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) - ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) - ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) - ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) - ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) - ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL31]] - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]] - ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]] - ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) - ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]] - ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) - ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]] - ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) - ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]] - ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) - ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]] - ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) - ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]] - ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) - ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]] - ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) - ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]] - ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) - ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]] - ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) - ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]] - ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) - ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]] - ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) - ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]] - ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) - ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]] - ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) - ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]] - ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32) - ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]] - ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) - ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C19]](s32) - ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]] - ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C20]](s32) - ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]] - ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) - ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]] - ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) - ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]] - ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) - ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]] - ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) - ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]] - ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) - ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]] - ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) - ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]] - ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) - ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]] - ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) - ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]] - ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) - ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]] - ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) - ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]] - ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) - ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]] - ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL62]] - ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) - ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]] - ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) - ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]] - ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) - ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]] - ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) - ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]] - ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) - ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]] - ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) - ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]] - ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) - ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]] - ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) - ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]] - ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) - ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]] - ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) - ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]] - ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) - ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]] - ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) - ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]] - ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) - ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]] - ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) - ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]] - ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) - ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]] - ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32) - ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]] - ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) - ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]] - ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32) - ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]] - ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32) - ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]] - ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) - ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]] - ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) - ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]] - ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) - ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]] - ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) - ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]] - ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) - ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]] - ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) - ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]] - ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) - ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]] - ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) - ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]] - ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) - ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]] - ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) - ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]] - ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) - ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s68) - %0:_(s17) = G_CONSTANT i17 0 - %1:_(s17) = G_CONSTANT i17 1 - %2:_(s17) = G_CONSTANT i17 2 - %3:_(s17) = G_CONSTANT i17 3 - %4:_(s68) = G_MERGE_VALUES %0, %1, %2, %3 - S_NOP 0, implicit %4 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C2]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C3]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C4]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C5]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C6]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C7]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C8]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(i32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C9]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C10]](i32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[OR8]], [[SHL9]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(i32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C11]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C12]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(i32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C13]](i32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(i32) = G_OR [[OR11]], [[SHL12]] + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C14]](i32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(i32) = G_OR [[OR12]], [[SHL13]] + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C15]](i32) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(i32) = G_OR [[OR13]], [[SHL14]] + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C16]](i32) + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(i32) = G_OR [[OR14]], [[SHL15]] + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C17]](i32) + ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(i32) = G_OR [[OR15]], [[SHL16]] + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(i32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C18]](i32) + ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(i32) = G_OR [[OR16]], [[SHL17]] + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(i32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C19]](i32) + ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(i32) = G_OR [[OR17]], [[SHL18]] + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C20]](i32) + ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(i32) = G_OR [[OR18]], [[SHL19]] + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(i32) = G_CONSTANT i32 21 + ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C21]](i32) + ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(i32) = G_OR [[OR19]], [[SHL20]] + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(i32) = G_CONSTANT i32 22 + ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C22]](i32) + ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(i32) = G_OR [[OR20]], [[SHL21]] + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C23]](i32) + ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(i32) = G_OR [[OR21]], [[SHL22]] + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C24]](i32) + ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(i32) = G_OR [[OR22]], [[SHL23]] + ; CHECK-NEXT: [[C25:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C25]](i32) + ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(i32) = G_OR [[OR23]], [[SHL24]] + ; CHECK-NEXT: [[C26:%[0-9]+]]:_(i32) = G_CONSTANT i32 26 + ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C26]](i32) + ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(i32) = G_OR [[OR24]], [[SHL25]] + ; CHECK-NEXT: [[C27:%[0-9]+]]:_(i32) = G_CONSTANT i32 27 + ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C27]](i32) + ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(i32) = G_OR [[OR25]], [[SHL26]] + ; CHECK-NEXT: [[C28:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C28]](i32) + ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(i32) = G_OR [[OR26]], [[SHL27]] + ; CHECK-NEXT: [[C29:%[0-9]+]]:_(i32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C29]](i32) + ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(i32) = G_OR [[OR27]], [[SHL28]] + ; CHECK-NEXT: [[C30:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C30]](i32) + ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(i32) = G_OR [[OR28]], [[SHL29]] + ; CHECK-NEXT: [[C31:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C31]](i32) + ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(i32) = G_OR [[OR29]], [[SHL30]] + ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL31]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(i32) = G_OR [[OR31]], [[SHL32]] + ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C3]](i32) + ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(i32) = G_OR [[OR32]], [[SHL33]] + ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C4]](i32) + ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(i32) = G_OR [[OR33]], [[SHL34]] + ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C5]](i32) + ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(i32) = G_OR [[OR34]], [[SHL35]] + ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C6]](i32) + ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(i32) = G_OR [[OR35]], [[SHL36]] + ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C7]](i32) + ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(i32) = G_OR [[OR36]], [[SHL37]] + ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C8]](i32) + ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(i32) = G_OR [[OR37]], [[SHL38]] + ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C9]](i32) + ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(i32) = G_OR [[OR38]], [[SHL39]] + ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C10]](i32) + ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(i32) = G_OR [[OR39]], [[SHL40]] + ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C11]](i32) + ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(i32) = G_OR [[OR40]], [[SHL41]] + ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C12]](i32) + ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(i32) = G_OR [[OR41]], [[SHL42]] + ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C13]](i32) + ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(i32) = G_OR [[OR42]], [[SHL43]] + ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C14]](i32) + ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(i32) = G_OR [[OR43]], [[SHL44]] + ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C15]](i32) + ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(i32) = G_OR [[OR44]], [[SHL45]] + ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C16]](i32) + ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(i32) = G_OR [[OR45]], [[SHL46]] + ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C17]](i32) + ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(i32) = G_OR [[OR46]], [[SHL47]] + ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C18]](i32) + ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(i32) = G_OR [[OR47]], [[SHL48]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C19]](i32) + ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(i32) = G_OR [[OR48]], [[SHL49]] + ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C20]](i32) + ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(i32) = G_OR [[OR49]], [[SHL50]] + ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C21]](i32) + ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(i32) = G_OR [[OR50]], [[SHL51]] + ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C22]](i32) + ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(i32) = G_OR [[OR51]], [[SHL52]] + ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C23]](i32) + ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(i32) = G_OR [[OR52]], [[SHL53]] + ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C24]](i32) + ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(i32) = G_OR [[OR53]], [[SHL54]] + ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C25]](i32) + ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(i32) = G_OR [[OR54]], [[SHL55]] + ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C26]](i32) + ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(i32) = G_OR [[OR55]], [[SHL56]] + ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C27]](i32) + ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(i32) = G_OR [[OR56]], [[SHL57]] + ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C28]](i32) + ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(i32) = G_OR [[OR57]], [[SHL58]] + ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C29]](i32) + ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(i32) = G_OR [[OR58]], [[SHL59]] + ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C30]](i32) + ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(i32) = G_OR [[OR59]], [[SHL60]] + ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C31]](i32) + ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(i32) = G_OR [[OR60]], [[SHL61]] + ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL62]] + ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C2]](i32) + ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(i32) = G_OR [[OR62]], [[SHL63]] + ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C3]](i32) + ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(i32) = G_OR [[OR63]], [[SHL64]] + ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C4]](i32) + ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(i32) = G_OR [[OR64]], [[SHL65]] + ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C5]](i32) + ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(i32) = G_OR [[OR65]], [[SHL66]] + ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C6]](i32) + ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(i32) = G_OR [[OR66]], [[SHL67]] + ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C7]](i32) + ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(i32) = G_OR [[OR67]], [[SHL68]] + ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C8]](i32) + ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(i32) = G_OR [[OR68]], [[SHL69]] + ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C9]](i32) + ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(i32) = G_OR [[OR69]], [[SHL70]] + ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C10]](i32) + ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(i32) = G_OR [[OR70]], [[SHL71]] + ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C11]](i32) + ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(i32) = G_OR [[OR71]], [[SHL72]] + ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C12]](i32) + ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(i32) = G_OR [[OR72]], [[SHL73]] + ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C13]](i32) + ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(i32) = G_OR [[OR73]], [[SHL74]] + ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C14]](i32) + ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(i32) = G_OR [[OR74]], [[SHL75]] + ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C15]](i32) + ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(i32) = G_OR [[OR75]], [[SHL76]] + ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C16]](i32) + ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(i32) = G_OR [[OR76]], [[SHL77]] + ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C17]](i32) + ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(i32) = G_OR [[OR77]], [[SHL78]] + ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C18]](i32) + ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(i32) = G_OR [[OR78]], [[SHL79]] + ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C19]](i32) + ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(i32) = G_OR [[OR79]], [[SHL80]] + ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C20]](i32) + ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(i32) = G_OR [[OR80]], [[SHL81]] + ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C21]](i32) + ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(i32) = G_OR [[OR81]], [[SHL82]] + ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C22]](i32) + ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(i32) = G_OR [[OR82]], [[SHL83]] + ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C23]](i32) + ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(i32) = G_OR [[OR83]], [[SHL84]] + ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C24]](i32) + ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(i32) = G_OR [[OR84]], [[SHL85]] + ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C25]](i32) + ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(i32) = G_OR [[OR85]], [[SHL86]] + ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C26]](i32) + ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(i32) = G_OR [[OR86]], [[SHL87]] + ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C27]](i32) + ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(i32) = G_OR [[OR87]], [[SHL88]] + ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C28]](i32) + ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(i32) = G_OR [[OR88]], [[SHL89]] + ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C29]](i32) + ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(i32) = G_OR [[OR89]], [[SHL90]] + ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C30]](i32) + ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(i32) = G_OR [[OR90]], [[SHL91]] + ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C31]](i32) + ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(i32) = G_OR [[OR91]], [[SHL92]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[OR30]](i32), [[OR61]](i32), [[OR92]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i68) = G_TRUNC [[MV]](i96) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](i68) + %0:_(i17) = G_CONSTANT i17 0 + %1:_(i17) = G_CONSTANT i17 1 + %2:_(i17) = G_CONSTANT i17 2 + %3:_(i17) = G_CONSTANT i17 3 + %4:_(i68) = G_MERGE_VALUES %0(i17), %1(i17), %2(i17), %3(i17) + S_NOP 0, implicit %4(i68) ... --- name: test_merge_p3_s16_s16 body: | bb.0: ; CHECK-LABEL: name: test_merge_p3_s16_s16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C2]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) - %0:_(s16) = G_CONSTANT i16 0 - %1:_(s16) = G_CONSTANT i16 1 - %2:_(p3) = G_MERGE_VALUES %0, %1 - $vgpr0 = COPY %2 + %0:_(i16) = G_CONSTANT i16 0 + %1:_(i16) = G_CONSTANT i16 1 + %2:_(p3) = G_MERGE_VALUES %0(i16), %1(i16) + $vgpr0 = COPY %2(p3) ... --- @@ -856,21 +856,21 @@ body: | ; CHECK-LABEL: name: test_merge_s32_s16_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s32) = G_MERGE_VALUES %2, %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i32) = G_MERGE_VALUES %2(i16), %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -882,32 +882,32 @@ body: | ; CHECK-LABEL: name: test_merge_s48_s16_s16_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - - %6:_(s48) = G_MERGE_VALUES %3, %4, %5 - %7:_(s64) = G_ANYEXT %6 - $vgpr0_vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i16) = G_TRUNC %0(i32) + %4:_(i16) = G_TRUNC %1(i32) + %5:_(i16) = G_TRUNC %2(i32) + %6:_(i48) = G_MERGE_VALUES %3(i16), %4(i16), %5(i16) + %7:_(i64) = G_ANYEXT %6(i48) + $vgpr0_vgpr1 = COPY %7(i64) + + ... --- @@ -920,14 +920,14 @@ body: | ; CHECK-LABEL: name: test_merge_s256_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[COPY1]](s128) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(s256) = G_MERGE_VALUES %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[COPY]](i128), [[COPY1]](i128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](i256) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(i256) = G_MERGE_VALUES %0(i128), %1(i128) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(i256) ... --- @@ -940,14 +940,14 @@ body: | ; CHECK-LABEL: name: test_merge_s512_s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s256), [[COPY1]](s256) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %2:_(s512) = G_MERGE_VALUES %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[COPY]](i256), [[COPY1]](i256) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](i512) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %2:_(i512) = G_MERGE_VALUES %0(i256), %1(i256) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %2(i512) ... --- @@ -960,13 +960,13 @@ body: | ; CHECK-LABEL: name: test_merge_s1024_s512 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s512), [[COPY1]](s512) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %2:_(s1024) = G_MERGE_VALUES %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[COPY]](i512), [[COPY1]](i512) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](i1024) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %2:_(i1024) = G_MERGE_VALUES %0(i512), %1(i512) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %2(i1024) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 2bf8649e76242..661ef4a9d682e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -15,14 +15,14 @@ body: | ; GCN-LABEL: name: test_mul_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[MUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_MUL %0, %1 - $vgpr0 = COPY %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GCN-NEXT: $vgpr0 = COPY [[MUL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_MUL %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -34,18 +34,18 @@ body: | ; GCN-LABEL: name: test_mul_v2s32 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GCN-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GCN-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GCN-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GCN-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GCN-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[MUL]](i32), [[MUL1]](i32) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -57,64 +57,64 @@ body: | ; GFX6-LABEL: name: test_mul_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[MUL]](i32), [[ADD1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX89-LABEL: name: test_mul_s64 ; GFX89: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX89-NEXT: {{ $}} - ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX89-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX89-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX89-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] - ; GFX89-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX89-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV5]](s32) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[ANYEXT]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX89-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV6]](s32) - ; GFX89-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX89-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX89-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX89-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C]] + ; GFX89-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX89-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV5]](i32) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV3]], [[ANYEXT]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV1]](i32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX89-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[UV6]](i32) + ; GFX89-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX1011-LABEL: name: test_mul_s64 ; GFX1011: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX1011-NEXT: {{ $}} - ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX1011-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX1011-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] - ; GFX1011-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD1]](s32) - ; GFX1011-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX1011-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX1011-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C]] + ; GFX1011-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[MUL]] + ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[ADD1]](i32) + ; GFX1011-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX12-LABEL: name: test_mul_s64 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[COPY]], [[COPY1]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -126,104 +126,104 @@ body: | ; GFX6-LABEL: name: test_mul_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV6]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV6]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV7]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV6]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[MUL]](i32), [[ADD1]](i32) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UV10]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UV10]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UV11]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UV10]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[MUL3]](i32), [[ADD3]](i32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX89-LABEL: name: test_mul_v2s64 ; GFX89: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX89-NEXT: {{ $}} - ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX89-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX89-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX89-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX89-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] - ; GFX89-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX89-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX89-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX89-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX89-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX89-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV12]](s32), [[UV14]], [[C]] - ; GFX89-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX89-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV17]](s32) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV12]](s32), [[UV15]], [[ANYEXT1]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV13]](s32), [[UV14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX89-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX89-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s32), [[UV18]](s32) - ; GFX89-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX89-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX89-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX89-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX89-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX89-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV6]], [[C]] + ; GFX89-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX89-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV9]](i32) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV7]], [[ANYEXT]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV5]](i32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX89-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX89-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV8]](i32), [[UV10]](i32) + ; GFX89-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX89-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV12]](i32), [[UV14]], [[C]] + ; GFX89-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX89-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV17]](i32) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV12]](i32), [[UV15]], [[ANYEXT1]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV13]](i32), [[UV14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX89-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX89-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV16]](i32), [[UV18]](i32) + ; GFX89-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX89-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX1011-LABEL: name: test_mul_v2s64 ; GFX1011: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1011-NEXT: {{ $}} - ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX1011-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX1011-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX1011-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX1011-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] - ; GFX1011-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL]] - ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[ADD1]](s32) - ; GFX1011-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX1011-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV10]](s32), [[UV12]], [[C]] - ; GFX1011-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX1011-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UV13]] - ; GFX1011-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL2]] - ; GFX1011-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UV12]] - ; GFX1011-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[MUL3]] - ; GFX1011-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV14]](s32), [[ADD3]](s32) - ; GFX1011-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX1011-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX1011-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX1011-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX1011-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX1011-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV6]], [[C]] + ; GFX1011-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV7]] + ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[MUL]] + ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV6]] + ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV8]](i32), [[ADD1]](i32) + ; GFX1011-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX1011-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV10]](i32), [[UV12]], [[C]] + ; GFX1011-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX1011-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[UV13]] + ; GFX1011-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[MUL2]] + ; GFX1011-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV11]], [[UV12]] + ; GFX1011-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ADD2]], [[MUL3]] + ; GFX1011-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV14]](i32), [[ADD3]](i32) + ; GFX1011-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX1011-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX12-LABEL: name: test_mul_v2s64 ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[UV]], [[UV2]] - ; GFX12-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[UV1]], [[UV3]] - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL1]](s64) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_MUL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[UV]], [[UV2]] + ; GFX12-NEXT: [[MUL1:%[0-9]+]]:_(i64) = G_MUL [[UV1]], [[UV3]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MUL]](i64), [[MUL1]](i64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_MUL %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -235,30 +235,30 @@ body: | ; GFX6-LABEL: name: test_mul_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND]](i32) ; ; GFX8PLUS-LABEL: name: test_mul_s16 ; GFX8PLUS: liveins: $vgpr0, $vgpr1 ; GFX8PLUS-NEXT: {{ $}} - ; GFX8PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8PLUS-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] - ; GFX8PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) - ; GFX8PLUS-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_MUL %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX8PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8PLUS-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[TRUNC]], [[TRUNC1]] + ; GFX8PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[MUL]](i16) + ; GFX8PLUS-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_MUL %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -270,57 +270,57 @@ body: | ; GFX6-LABEL: name: test_mul_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[BITCAST]], [[BITCAST1]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[LSHR1]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[BITCAST]], [[BITCAST1]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[LSHR]], [[LSHR1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: test_mul_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[MUL1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[MUL]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[MUL1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9PLUS-LABEL: name: test_mul_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]] - ; GFX9PLUS-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_MUL %0, %1 - $vgpr0 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: $vgpr0 = COPY [[MUL]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_MUL %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -331,87 +331,87 @@ body: | ; GFX6-LABEL: name: test_mul_v3s16 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY3]] - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY4]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY5]] - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[MUL2]](s32) - ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY3]] + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[MUL]](i32) + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[COPY1]], [[COPY4]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[MUL1]](i32) + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[COPY2]], [[COPY5]] + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[MUL2]](i32) + ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16) ; ; GFX8-LABEL: name: test_mul_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC3]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC4]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[MUL]](s16), implicit [[MUL1]](s16), implicit [[MUL2]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i16) = G_MUL [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[MUL]](i16), implicit [[MUL1]](i16), implicit [[MUL2]](i16) ; ; GFX9PLUS-LABEL: name: test_mul_v3s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9PLUS-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9PLUS-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9PLUS-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9PLUS-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9PLUS-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[MUL]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[TRUNC6]](s16), implicit [[TRUNC7]](s16), implicit [[TRUNC8]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s16) = G_TRUNC %0 - %7:_(s16) = G_TRUNC %1 - %8:_(s16) = G_TRUNC %2 - %9:_(s16) = G_TRUNC %3 - %10:_(s16) = G_TRUNC %4 - %11:_(s16) = G_TRUNC %5 - %12:_(<3 x s16>) = G_BUILD_VECTOR %6, %7, %8 - %13:_(<3 x s16>) = G_BUILD_VECTOR %9, %10, %11 - %14:_(<3 x s16>) = G_MUL %12, %13 - %15:_(s16), %16:_(s16), %17:_(s16) = G_UNMERGE_VALUES %14 - S_ENDPGM 0, implicit %15, implicit %16, implicit %17 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9PLUS-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9PLUS-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9PLUS-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC4]](i16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[DEF]](i16) + ; GFX9PLUS-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9PLUS-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[MUL]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[MUL1]](<2 x i16>) + ; GFX9PLUS-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[TRUNC6]](i16), implicit [[TRUNC7]](i16), implicit [[TRUNC8]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i16) = G_TRUNC %0(i32) + %7:_(i16) = G_TRUNC %1(i32) + %8:_(i16) = G_TRUNC %2(i32) + %9:_(i16) = G_TRUNC %3(i32) + %10:_(i16) = G_TRUNC %4(i32) + %11:_(i16) = G_TRUNC %5(i32) + %12:_(<3 x i16>) = G_BUILD_VECTOR %6(i16), %7(i16), %8(i16) + %13:_(<3 x i16>) = G_BUILD_VECTOR %9(i16), %10(i16), %11(i16) + %14:_(<3 x i16>) = G_MUL %12, %13 + %15:_(i16), %16:_(i16), %17:_(i16) = G_UNMERGE_VALUES %14(<3 x i16>) + S_ENDPGM 0, implicit %15(i16), implicit %16(i16), implicit %17(i16) ... --- @@ -423,93 +423,93 @@ body: | ; GFX6-LABEL: name: test_mul_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[BITCAST]], [[BITCAST2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[LSHR2]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[BITCAST1]], [[BITCAST3]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR1]], [[LSHR3]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[BITCAST]], [[BITCAST2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[LSHR]], [[LSHR2]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[BITCAST1]], [[BITCAST3]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[LSHR1]], [[LSHR3]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL3]], [[C1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: test_mul_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC4]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC5]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[TRUNC2]], [[TRUNC6]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[TRUNC3]], [[TRUNC7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[MUL1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[MUL2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[MUL3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i16) = G_MUL [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i16) = G_MUL [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[MUL]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[MUL1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[MUL2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[MUL3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9PLUS-LABEL: name: test_mul_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV]], [[UV2]] - ; GFX9PLUS-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]] - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>) - ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9PLUS-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[UV]], [[UV2]] + ; GFX9PLUS-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i16>) = G_MUL [[UV1]], [[UV3]] + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[MUL]](<2 x i16>), [[MUL1]](<2 x i16>) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -521,17 +521,17 @@ body: | ; GCN-LABEL: name: test_mul_s24 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[MUL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24) = G_MUL %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GCN-NEXT: $vgpr0 = COPY [[MUL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24) = G_MUL %2, %3 + %5:_(i32) = G_ANYEXT %4(i24) + $vgpr0 = COPY %5(i32) ... --- @@ -542,67 +542,67 @@ body: | ; GFX6-LABEL: name: test_mul_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[MUL]](i32), [[ADD1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX89-LABEL: name: test_mul_s33 ; GFX89: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX89-NEXT: {{ $}} - ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX89-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX89-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX89-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] - ; GFX89-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX89-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV5]](s32) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[ANYEXT]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX89-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV6]](s32) - ; GFX89-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX89-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX89-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX89-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C]] + ; GFX89-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX89-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV5]](i32) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV3]], [[ANYEXT]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV1]](i32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX89-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[UV6]](i32) + ; GFX89-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX1011-LABEL: name: test_mul_s33 ; GFX1011: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX1011-NEXT: {{ $}} - ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX1011-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX1011-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] - ; GFX1011-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD1]](s32) - ; GFX1011-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX1011-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX1011-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C]] + ; GFX1011-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[MUL]] + ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[ADD1]](i32) + ; GFX1011-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX12-LABEL: name: test_mul_s33 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_MUL %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[COPY]], [[COPY1]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_MUL %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -614,98 +614,98 @@ body: | ; GFX6-LABEL: name: test_mul_s96 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV4]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV3]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV2]], [[UV3]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV4]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV5]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV4]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[MUL5]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[UMULH2]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ADD4]], [[ADD]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[MUL]](i32), [[UADDO2]](i32), [[ADD5]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) ; ; GFX89-LABEL: name: test_mul_s96 ; GFX89: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX89-NEXT: {{ $}} - ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX89-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX89-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX89-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[C]] - ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV5]], [[C]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV4]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV2]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_4]] - ; GFX89-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX89-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV7]](s32), [[UV8]](s32) - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV4]], [[MV]] - ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX89-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX89-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV10]](s32), [[UV11]](s32) - ; GFX89-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](s96) + ; GFX89-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX89-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX89-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX89-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX89-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV3]], [[C]] + ; GFX89-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV5]], [[C]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV1]](i32), [[UV4]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV2]](i32), [[UV3]], [[AMDGPU_MAD_U64_U32_4]] + ; GFX89-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX89-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV7]](i32), [[UV8]](i32) + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV4]], [[MV]] + ; GFX89-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV1]](i32), [[UV3]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX89-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX89-NEXT: [[MV1:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV6]](i32), [[UV10]](i32), [[UV11]](i32) + ; GFX89-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](i96) ; ; GFX1011-LABEL: name: test_mul_s96 ; GFX1011: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX1011-NEXT: {{ $}} - ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX1011-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX1011-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[C]] - ; GFX1011-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] - ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] - ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] - ; GFX1011-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] - ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL2]] - ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV7]](s32), [[ADD1]](s32) - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV4]], [[MV]] - ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX1011-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX1011-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV8]](s32), [[UV9]](s32) - ; GFX1011-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](s96) + ; GFX1011-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX1011-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX1011-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX1011-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX1011-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV3]], [[C]] + ; GFX1011-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX1011-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV5]] + ; GFX1011-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV4]] + ; GFX1011-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL]], [[MUL1]] + ; GFX1011-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV2]], [[UV3]] + ; GFX1011-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL2]] + ; GFX1011-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV7]](i32), [[ADD1]](i32) + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV4]], [[MV]] + ; GFX1011-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV1]](i32), [[UV3]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX1011-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX1011-NEXT: [[MV1:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV6]](i32), [[UV8]](i32), [[UV9]](i32) + ; GFX1011-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](i96) ; ; GFX12-LABEL: name: test_mul_s96 ; GFX12: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX12-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX12-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[C]] - ; GFX12-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] - ; GFX12-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] - ; GFX12-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] - ; GFX12-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] - ; GFX12-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL2]] - ; GFX12-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV7]](s32), [[ADD1]](s32) - ; GFX12-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV4]], [[MV]] - ; GFX12-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX12-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX12-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV8]](s32), [[UV9]](s32) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s96) = G_MUL %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX12-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX12-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV3]], [[C]] + ; GFX12-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX12-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV5]] + ; GFX12-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV4]] + ; GFX12-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL]], [[MUL1]] + ; GFX12-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV2]], [[UV3]] + ; GFX12-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL2]] + ; GFX12-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV7]](i32), [[ADD1]](i32) + ; GFX12-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV4]], [[MV]] + ; GFX12-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV1]](i32), [[UV3]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX12-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX12-NEXT: [[MV1:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV6]](i32), [[UV8]](i32), [[UV9]](i32) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i96) = G_MUL %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index ef7759f5120f9..1e48add33607d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: test_or_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_OR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_OR %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -29,17 +29,17 @@ body: | ; CHECK-LABEL: name: test_or_s1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(ne), %0, %2 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s32) = G_OR %0, %1 - S_NOP 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i32) = G_OR %0, %1 + S_NOP 0, implicit %5(i32) ... --- @@ -51,31 +51,31 @@ body: | ; CHECK-LABEL: name: test_or_v2s1 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %2 - %5:_(<2 x s1>) = G_OR %3, %4 - %6:_(<2 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV4]](i32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV5]](i32), [[UV7]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i1) = G_OR [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[OR1]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %1 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %2 + %5:_(<2 x i1>) = G_OR %3, %4 + %6:_(<2 x i32>) = G_ANYEXT %5(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) ... --- @@ -87,35 +87,35 @@ body: | ; CHECK-LABEL: name: test_or_v3s1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP3]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP4]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[ICMP5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %2 - %5:_(<3 x s1>) = G_OR %3, %4 - %6:_(<3 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV6]](i32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV7]](i32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV8]](i32), [[UV11]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i1) = G_OR [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i1) = G_OR [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[OR1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[OR2]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %4:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %2 + %5:_(<3 x i1>) = G_OR %3, %4 + %6:_(<3 x i32>) = G_ANYEXT %5(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %6(<3 x i32>) ... --- @@ -127,14 +127,14 @@ body: | ; CHECK-LABEL: name: test_or_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_OR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_OR %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -146,21 +146,21 @@ body: | ; CHECK-LABEL: name: test_or_s96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s96) = G_OR %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY]](i96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY1]](i96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY1]](i96), 64 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[OR]](i64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[OR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i96) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... --- @@ -172,18 +172,18 @@ body: | ; CHECK-LABEL: name: test_or_128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(s128) = G_OR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](i128) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[OR]](i64), [[OR1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(i128) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -195,17 +195,17 @@ body: | ; CHECK-LABEL: name: test_or_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_OR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_OR %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -217,17 +217,17 @@ body: | ; CHECK-LABEL: name: test_or_s8 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_OR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_OR %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -239,20 +239,20 @@ body: | ; CHECK-LABEL: name: test_or_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_OR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_OR %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -264,20 +264,20 @@ body: | ; CHECK-LABEL: name: test_or_s24 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_OR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_OR %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -289,17 +289,17 @@ body: | ; CHECK-LABEL: name: test_or_s48 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s48) = G_TRUNC %0 - %3:_(s48) = G_TRUNC %1 - %4:_(s48) = G_OR %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i48) = G_TRUNC %0(i64) + %3:_(i48) = G_TRUNC %1(i64) + %4:_(i48) = G_OR %2, %3 + %5:_(i64) = G_ANYEXT %4(i48) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -311,14 +311,14 @@ body: | ; CHECK-LABEL: name: test_or_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_OR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_OR %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -330,21 +330,21 @@ body: | ; CHECK-LABEL: name: test_or_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[OR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_OR %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[UV4]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[OR]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV6]](i32), [[UV7]](i32), [[OR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -356,18 +356,18 @@ body: | ; CHECK-LABEL: name: test_or_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = G_OR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x i32>) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[OR]](<2 x i32>), [[OR1]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -376,29 +376,29 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]] - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) - %0:_(<5 x s32>) = G_IMPLICIT_DEF - %1:_(<5 x s32>) = G_IMPLICIT_DEF - %2:_(<5 x s32>) = G_OR %0, %1 - %3:_(<8 x s32>) = G_IMPLICIT_DEF - %4:_(<8 x s32>) = G_INSERT %3, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<5 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<5 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV5]](i32), [[UV6]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV7]](i32), [[UV8]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x i32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[OR]](<2 x i32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[OR1]](<2 x i32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF2]](<8 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[UV10]](i32), [[UV11]](i32), [[UV12]](i32), [[UV13]](i32), [[OR2]](i32), [[UV19]](i32), [[UV20]](i32), [[UV21]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x i32>) + %0:_(<5 x i32>) = G_IMPLICIT_DEF + %1:_(<5 x i32>) = G_IMPLICIT_DEF + %2:_(<5 x i32>) = G_OR %0, %1 + %3:_(<8 x i32>) = G_IMPLICIT_DEF + %4:_(<8 x i32>) = G_INSERT %3, %2(<5 x i32>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<8 x i32>) ... --- @@ -410,18 +410,18 @@ body: | ; CHECK-LABEL: name: test_or_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_OR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[OR]](i64), [[OR1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -433,14 +433,14 @@ body: | ; CHECK-LABEL: name: test_or_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_OR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x i16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_OR %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -451,41 +451,41 @@ body: | ; CHECK-LABEL: name: test_or_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_OR %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x i16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[OR]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV6]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>) = G_OR %2, %4 + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) ... @@ -498,14 +498,14 @@ body: | ; CHECK-LABEL: name: test_or_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_OR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x i16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_OR %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -514,70 +514,70 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR5]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) - %0:_(<5 x s16>) = G_IMPLICIT_DEF - %1:_(<5 x s16>) = G_IMPLICIT_DEF - %2:_(<5 x s16>) = G_OR %0, %1 - %3:_(<8 x s16>) = G_IMPLICIT_DEF - %4:_(<8 x s16>) = G_INSERT %3, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x i16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[OR]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(<4 x i16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[OR5]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>), [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF2]](<8 x i16>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST11]], [[C]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[AND3]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR7]](i32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR4]], [[C]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR8]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[UV13]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x i16>) + %0:_(<5 x i16>) = G_IMPLICIT_DEF + %1:_(<5 x i16>) = G_IMPLICIT_DEF + %2:_(<5 x i16>) = G_OR %0, %1 + %3:_(<8 x i16>) = G_IMPLICIT_DEF + %4:_(<8 x i16>) = G_INSERT %3, %2(<5 x i16>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<8 x i16>) ... --- @@ -586,20 +586,20 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s8>) = G_IMPLICIT_DEF - %1:_(<3 x s8>) = G_IMPLICIT_DEF - %2:_(<3 x s8>) = G_OR %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<4 x i32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i8>) = G_IMPLICIT_DEF + %1:_(<3 x i8>) = G_IMPLICIT_DEF + %2:_(<3 x i8>) = G_OR %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -608,19 +608,19 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(<4 x s8>) = G_IMPLICIT_DEF - %2:_(<4 x s8>) = G_OR %0, %1 - %3:_(<4 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<4 x i32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32), [[OR2]](i32), [[OR3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(<4 x i8>) = G_IMPLICIT_DEF + %2:_(<4 x i8>) = G_OR %0, %1 + %3:_(<4 x i32>) = G_ANYEXT %2(<4 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index f1c4994c6f76f..77e1f14cd6134 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -11,45 +11,49 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY]] ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY]](s32), %bb.0, [[ADD]](s32), %bb.1 - ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i32) = G_PHI [[COPY]](i32), %bb.0, [[ADD]](i32), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](i32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s32) = G_ADD %0, %0 + %4:_(i32) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(s32) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0 = COPY %5 + %5:_(i32) = G_PHI %0(i32), %bb.0, %4(i32), %bb.1 + $vgpr0 = COPY %5(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v2s16 @@ -61,56 +65,60 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BITCAST2]](<2 x s16>), %bb.1 - ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](<2 x s16>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x i16>) = G_PHI [[COPY]](<2 x i16>), %bb.0, [[BITCAST2]](<2 x i16>), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](<2 x i16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(<2 x s16>) = G_ADD %0, %0 + %4:_(<2 x i16>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %6:_(<2 x s16>) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %5:_(<2 x i16>) = G_PHI %0(<2 x i16>), %bb.0, %4(<2 x i16>), %bb.1 + $vgpr0 = COPY %5(<2 x i16>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -123,92 +131,96 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[BITCAST1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C4]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C5]], [[C4]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C7]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x i16>) = G_PHI [[CONCAT_VECTORS]](<4 x i16>), %bb.0, [[CONCAT_VECTORS1]](<4 x i16>), %bb.1 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[PHI]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C6]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C7]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST6]], [[C7]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C6]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST7]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C6]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV2]](<2 x i16>), [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x i16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - %4:_(<3 x s16>) = G_EXTRACT %0, 0 - G_BRCOND %3, %bb.1 + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %4:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(<3 x s16>) = G_ADD %4, %4 + %5:_(<3 x i16>) = G_ADD %4, %4 G_BR %bb.2 bb.2: - %6:_(<3 x s16>) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + %6:_(<3 x i16>) = G_PHI %4(<3 x i16>), %bb.0, %5(<3 x i16>), %bb.1 + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -222,70 +234,74 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD3]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x i16>) = G_PHI [[COPY]](<4 x i16>), %bb.0, [[CONCAT_VECTORS]](<4 x i16>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<4 x i16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<4 x s16>) = G_ADD %0, %0 + %4:_(<4 x i16>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<4 x s16>) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1 = COPY %5 + %5:_(<4 x i16>) = G_PHI %0(<4 x i16>), %bb.0, %4(<4 x i16>), %bb.1 + $vgpr0_vgpr1 = COPY %5(<4 x i16>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v2s32 @@ -297,49 +313,53 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s32>) = G_PHI [[COPY]](<2 x s32>), %bb.0, [[BUILD_VECTOR]](<2 x s32>), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<2 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x i32>) = G_PHI [[COPY]](<2 x i32>), %bb.0, [[BUILD_VECTOR]](<2 x i32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<2 x i32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<2 x s32>) = G_ADD %0, %0 + %4:_(<2 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<2 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1 = COPY %5 + %5:_(<2 x i32>) = G_PHI %0(<2 x i32>), %bb.0, %4(<2 x i32>), %bb.1 + $vgpr0_vgpr1 = COPY %5(<2 x i32>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v3s32 @@ -351,50 +371,54 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[UV5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32), [[ADD2]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s32>) = G_PHI [[COPY]](<3 x s32>), %bb.0, [[BUILD_VECTOR]](<3 x s32>), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PHI]](<3 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x i32>) = G_PHI [[COPY]](<3 x i32>), %bb.0, [[BUILD_VECTOR]](<3 x i32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PHI]](<3 x i32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<3 x s32>) = G_ADD %0, %0 + %4:_(<3 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<3 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1_vgpr2 = COPY %5 + %5:_(<3 x i32>) = G_PHI %0(<3 x i32>), %bb.0, %4(<3 x i32>), %bb.1 + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v4s32 @@ -406,51 +430,55 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV4]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV5]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV6]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV7]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[UV6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32), [[ADD2]](i32), [[ADD3]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[COPY]](<4 x s32>), %bb.0, [[BUILD_VECTOR]](<4 x s32>), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<4 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x i32>) = G_PHI [[COPY]](<4 x i32>), %bb.0, [[BUILD_VECTOR]](<4 x i32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<4 x i32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<4 x s32>) = G_ADD %0, %0 + %4:_(<4 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<4 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + %5:_(<4 x i32>) = G_PHI %0(<4 x i32>), %bb.0, %4(<4 x i32>), %bb.1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<4 x i32>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v8s32 @@ -462,55 +490,59 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV8]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV9]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[UV10]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[UV11]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV4]], [[UV12]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[UV13]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV6]], [[UV14]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32), [[ADD2]](i32), [[ADD3]](i32), [[ADD4]](i32), [[ADD5]](i32), [[ADD6]](i32), [[ADD7]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<8 x s32>) = G_PHI [[COPY]](<8 x s32>), %bb.0, [[BUILD_VECTOR]](<8 x s32>), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<8 x i32>) = G_PHI [[COPY]](<8 x i32>), %bb.0, [[BUILD_VECTOR]](<8 x i32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x i32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<8 x s32>) = G_ADD %0, %0 + %4:_(<8 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<8 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + %5:_(<8 x i32>) = G_PHI %0(<8 x i32>), %bb.0, %4(<8 x i32>), %bb.1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(<8 x i32>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v16s32 @@ -522,60 +554,64 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<16 x i32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<16 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV16]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV17]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[UV18]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[UV19]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV4]], [[UV20]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[UV21]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV6]], [[UV22]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[UV23]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UV8]], [[UV24]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[UV25]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[UV10]], [[UV26]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UV11]], [[UV27]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[UV12]], [[UV28]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[UV13]], [[UV29]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[UV14]], [[UV30]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[UV31]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32), [[ADD2]](i32), [[ADD3]](i32), [[ADD4]](i32), [[ADD5]](i32), [[ADD6]](i32), [[ADD7]](i32), [[ADD8]](i32), [[ADD9]](i32), [[ADD10]](i32), [[ADD11]](i32), [[ADD12]](i32), [[ADD13]](i32), [[ADD14]](i32), [[ADD15]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x i32>) = G_PHI [[DEF]](<16 x i32>), %bb.0, [[BUILD_VECTOR]](<16 x i32>), %bb.1 + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x i32>) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - %0:_(<16 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<16 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<16 x s32>) = G_ADD %0, %0 + %4:_(<16 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<16 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5 + %5:_(<16 x i32>) = G_PHI %0(<16 x i32>), %bb.0, %4(<16 x i32>), %bb.1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5(<16 x i32>) + + + + ... @@ -589,76 +625,80 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32), [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32), [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32), [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32), [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32), [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32), [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32), [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32), [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32), [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32), [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32), [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32), [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32), [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32), [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV32]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV33]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[UV34]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[UV35]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV4]], [[UV36]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[UV37]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV6]], [[UV38]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[UV39]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UV8]], [[UV40]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[UV41]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[UV10]], [[UV42]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UV11]], [[UV43]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[UV12]], [[UV44]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[UV13]], [[UV45]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[UV14]], [[UV46]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[UV47]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV16]], [[UV48]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[UV49]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[UV18]], [[UV50]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UV19]], [[UV51]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[UV20]], [[UV52]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[UV21]], [[UV53]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[UV22]], [[UV54]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UV23]], [[UV55]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[UV24]], [[UV56]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[UV25]], [[UV57]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[UV26]], [[UV58]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[UV27]], [[UV59]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[UV28]], [[UV60]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UV29]], [[UV61]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[UV30]], [[UV62]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[UV31]], [[UV63]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32), [[ADD2]](i32), [[ADD3]](i32), [[ADD4]](i32), [[ADD5]](i32), [[ADD6]](i32), [[ADD7]](i32), [[ADD8]](i32), [[ADD9]](i32), [[ADD10]](i32), [[ADD11]](i32), [[ADD12]](i32), [[ADD13]](i32), [[ADD14]](i32), [[ADD15]](i32), [[ADD16]](i32), [[ADD17]](i32), [[ADD18]](i32), [[ADD19]](i32), [[ADD20]](i32), [[ADD21]](i32), [[ADD22]](i32), [[ADD23]](i32), [[ADD24]](i32), [[ADD25]](i32), [[ADD26]](i32), [[ADD27]](i32), [[ADD28]](i32), [[ADD29]](i32), [[ADD30]](i32), [[ADD31]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 - ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x i32>) = G_PHI [[DEF]](<32 x i32>), %bb.0, [[BUILD_VECTOR]](<32 x i32>), %bb.1 + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x i32>) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - %0:_(<32 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<32 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<32 x s32>) = G_ADD %0, %0 + %4:_(<32 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<32 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5 + %5:_(<32 x i32>) = G_PHI %0(<32 x i32>), %bb.0, %4(<32 x i32>), %bb.1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5(<32 x i32>) + + + + ... @@ -672,119 +712,123 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x i32>), [[UV1:%[0-9]+]]:_(<16 x i32>) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x i32>), [[UV3:%[0-9]+]]:_(<16 x i32>) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32), [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32), [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32), [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32), [[UV128:%[0-9]+]]:_(s32), [[UV129:%[0-9]+]]:_(s32), [[UV130:%[0-9]+]]:_(s32), [[UV131:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV64]], [[UV128]] - ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[UV129]] - ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV66]], [[UV130]] - ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV67]], [[UV131]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32), [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32), [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32), [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32), [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32), [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32), [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32), [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32), [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32), [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32), [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32), [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32), [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32), [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32), [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32), [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32), [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32), [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32), [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32), [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32), [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32), [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32), [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(i32), [[UV69:%[0-9]+]]:_(i32), [[UV70:%[0-9]+]]:_(i32), [[UV71:%[0-9]+]]:_(i32), [[UV72:%[0-9]+]]:_(i32), [[UV73:%[0-9]+]]:_(i32), [[UV74:%[0-9]+]]:_(i32), [[UV75:%[0-9]+]]:_(i32), [[UV76:%[0-9]+]]:_(i32), [[UV77:%[0-9]+]]:_(i32), [[UV78:%[0-9]+]]:_(i32), [[UV79:%[0-9]+]]:_(i32), [[UV80:%[0-9]+]]:_(i32), [[UV81:%[0-9]+]]:_(i32), [[UV82:%[0-9]+]]:_(i32), [[UV83:%[0-9]+]]:_(i32), [[UV84:%[0-9]+]]:_(i32), [[UV85:%[0-9]+]]:_(i32), [[UV86:%[0-9]+]]:_(i32), [[UV87:%[0-9]+]]:_(i32), [[UV88:%[0-9]+]]:_(i32), [[UV89:%[0-9]+]]:_(i32), [[UV90:%[0-9]+]]:_(i32), [[UV91:%[0-9]+]]:_(i32), [[UV92:%[0-9]+]]:_(i32), [[UV93:%[0-9]+]]:_(i32), [[UV94:%[0-9]+]]:_(i32), [[UV95:%[0-9]+]]:_(i32), [[UV96:%[0-9]+]]:_(i32), [[UV97:%[0-9]+]]:_(i32), [[UV98:%[0-9]+]]:_(i32), [[UV99:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(i32), [[UV101:%[0-9]+]]:_(i32), [[UV102:%[0-9]+]]:_(i32), [[UV103:%[0-9]+]]:_(i32), [[UV104:%[0-9]+]]:_(i32), [[UV105:%[0-9]+]]:_(i32), [[UV106:%[0-9]+]]:_(i32), [[UV107:%[0-9]+]]:_(i32), [[UV108:%[0-9]+]]:_(i32), [[UV109:%[0-9]+]]:_(i32), [[UV110:%[0-9]+]]:_(i32), [[UV111:%[0-9]+]]:_(i32), [[UV112:%[0-9]+]]:_(i32), [[UV113:%[0-9]+]]:_(i32), [[UV114:%[0-9]+]]:_(i32), [[UV115:%[0-9]+]]:_(i32), [[UV116:%[0-9]+]]:_(i32), [[UV117:%[0-9]+]]:_(i32), [[UV118:%[0-9]+]]:_(i32), [[UV119:%[0-9]+]]:_(i32), [[UV120:%[0-9]+]]:_(i32), [[UV121:%[0-9]+]]:_(i32), [[UV122:%[0-9]+]]:_(i32), [[UV123:%[0-9]+]]:_(i32), [[UV124:%[0-9]+]]:_(i32), [[UV125:%[0-9]+]]:_(i32), [[UV126:%[0-9]+]]:_(i32), [[UV127:%[0-9]+]]:_(i32), [[UV128:%[0-9]+]]:_(i32), [[UV129:%[0-9]+]]:_(i32), [[UV130:%[0-9]+]]:_(i32), [[UV131:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<32 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV4]], [[UV68]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[UV69]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[UV6]], [[UV70]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[UV71]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV8]], [[UV72]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[UV73]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV10]], [[UV74]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UV11]], [[UV75]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UV12]], [[UV76]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[UV13]], [[UV77]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[UV14]], [[UV78]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[UV79]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[UV16]], [[UV80]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[UV81]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[UV18]], [[UV82]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UV19]], [[UV83]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV20]], [[UV84]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[UV21]], [[UV85]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[UV22]], [[UV86]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UV23]], [[UV87]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[UV24]], [[UV88]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[UV25]], [[UV89]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[UV26]], [[UV90]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UV27]], [[UV91]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[UV28]], [[UV92]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[UV29]], [[UV93]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[UV30]], [[UV94]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[UV31]], [[UV95]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[UV32]], [[UV96]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UV33]], [[UV97]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[UV34]], [[UV98]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[UV35]], [[UV99]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[UV36]], [[UV100]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UV37]], [[UV101]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[UV38]], [[UV102]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[UV39]], [[UV103]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(i32) = G_ADD [[UV40]], [[UV104]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(i32) = G_ADD [[UV41]], [[UV105]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(i32) = G_ADD [[UV42]], [[UV106]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(i32) = G_ADD [[UV43]], [[UV107]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(i32) = G_ADD [[UV44]], [[UV108]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(i32) = G_ADD [[UV45]], [[UV109]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(i32) = G_ADD [[UV46]], [[UV110]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(i32) = G_ADD [[UV47]], [[UV111]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(i32) = G_ADD [[UV48]], [[UV112]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(i32) = G_ADD [[UV49]], [[UV113]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(i32) = G_ADD [[UV50]], [[UV114]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(i32) = G_ADD [[UV51]], [[UV115]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(i32) = G_ADD [[UV52]], [[UV116]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(i32) = G_ADD [[UV53]], [[UV117]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(i32) = G_ADD [[UV54]], [[UV118]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(i32) = G_ADD [[UV55]], [[UV119]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(i32) = G_ADD [[UV56]], [[UV120]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(i32) = G_ADD [[UV57]], [[UV121]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(i32) = G_ADD [[UV58]], [[UV122]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(i32) = G_ADD [[UV59]], [[UV123]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(i32) = G_ADD [[UV60]], [[UV124]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(i32) = G_ADD [[UV61]], [[UV125]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(i32) = G_ADD [[UV62]], [[UV126]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(i32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(i32) = G_ADD [[UV64]], [[UV128]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(i32) = G_ADD [[UV65]], [[UV129]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(i32) = G_ADD [[UV66]], [[UV130]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(i32) = G_ADD [[UV67]], [[UV131]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32), [[ADD2]](i32), [[ADD3]](i32), [[ADD4]](i32), [[ADD5]](i32), [[ADD6]](i32), [[ADD7]](i32), [[ADD8]](i32), [[ADD9]](i32), [[ADD10]](i32), [[ADD11]](i32), [[ADD12]](i32), [[ADD13]](i32), [[ADD14]](i32), [[ADD15]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ADD16]](i32), [[ADD17]](i32), [[ADD18]](i32), [[ADD19]](i32), [[ADD20]](i32), [[ADD21]](i32), [[ADD22]](i32), [[ADD23]](i32), [[ADD24]](i32), [[ADD25]](i32), [[ADD26]](i32), [[ADD27]](i32), [[ADD28]](i32), [[ADD29]](i32), [[ADD30]](i32), [[ADD31]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ADD32]](i32), [[ADD33]](i32), [[ADD34]](i32), [[ADD35]](i32), [[ADD36]](i32), [[ADD37]](i32), [[ADD38]](i32), [[ADD39]](i32), [[ADD40]](i32), [[ADD41]](i32), [[ADD42]](i32), [[ADD43]](i32), [[ADD44]](i32), [[ADD45]](i32), [[ADD46]](i32), [[ADD47]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x i32>) = G_BUILD_VECTOR [[ADD48]](i32), [[ADD49]](i32), [[ADD50]](i32), [[ADD51]](i32), [[ADD52]](i32), [[ADD53]](i32), [[ADD54]](i32), [[ADD55]](i32), [[ADD56]](i32), [[ADD57]](i32), [[ADD58]](i32), [[ADD59]](i32), [[ADD60]](i32), [[ADD61]](i32), [[ADD62]](i32), [[ADD63]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) - ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x i32>) = G_PHI [[UV]](<16 x i32>), %bb.0, [[BUILD_VECTOR]](<16 x i32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x i32>) = G_PHI [[UV1]](<16 x i32>), %bb.0, [[BUILD_VECTOR1]](<16 x i32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x i32>) = G_PHI [[UV2]](<16 x i32>), %bb.0, [[BUILD_VECTOR2]](<16 x i32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x i32>) = G_PHI [[UV3]](<16 x i32>), %bb.0, [[BUILD_VECTOR3]](<16 x i32>), %bb.1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x i32>) = G_CONCAT_VECTORS [[PHI]](<16 x i32>), [[PHI1]](<16 x i32>), [[PHI2]](<16 x i32>), [[PHI3]](<16 x i32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x i32>) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - %0:_(<64 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<64 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<64 x s32>) = G_ADD %0, %0 + %4:_(<64 x i32>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<64 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5 + %5:_(<64 x i32>) = G_PHI %0(<64 x i32>), %bb.0, %4(<64 x i32>), %bb.1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5(<64 x i32>) + + + + ... @@ -798,49 +842,53 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[COPY]](s64), %bb.0, [[MV]](s64), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](s64) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i64) = G_PHI [[COPY]](i64), %bb.0, [[MV]](i64), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](i64) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s64) = G_ADD %0, %0 + %4:_(i64) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(s64) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1 = COPY %5 + %5:_(i64) = G_PHI %0(i64), %bb.0, %4(i64), %bb.1 + $vgpr0_vgpr1 = COPY %5(i64) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v2s64 @@ -852,57 +900,61 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s64>) = G_PHI [[COPY]](<2 x s64>), %bb.0, [[BUILD_VECTOR]](<2 x s64>), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<2 x s64>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x i64>) = G_PHI [[COPY]](<2 x i64>), %bb.0, [[BUILD_VECTOR]](<2 x i64>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<2 x i64>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(<2 x s64>) = G_ADD %0, %0 + %4:_(<2 x i64>) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(<2 x s64>) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + %5:_(<2 x i64>) = G_PHI %0(<2 x i64>), %bb.0, %4(<2 x i64>), %bb.1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v3s64 @@ -914,69 +966,73 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[UV]](i64), [[UV1]](i64), [[UV2]](i64) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[UV12]], [[UV14]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO4]](i32), [[UADDE4]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64), [[MV2]](i64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x i64>) = G_PHI [[BUILD_VECTOR]](<3 x i64>), %bb.0, [[BUILD_VECTOR1]](<3 x i64>), %bb.1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(i64), [[UV17:%[0-9]+]]:_(i64), [[UV18:%[0-9]+]]:_(i64), [[UV19:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(i64), [[UV21:%[0-9]+]]:_(i64), [[UV22:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[PHI]](<3 x i64>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[UV20]](i64), [[UV21]](i64), [[UV22]](i64), [[UV19]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x i64>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - %4:_(<3 x s64>) = G_EXTRACT %0, 0 - G_BRCOND %3, %bb.1 + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %4:_(<3 x i64>) = G_EXTRACT %0(<4 x i64>), 0 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(<3 x s64>) = G_ADD %4, %4 + %5:_(<3 x i64>) = G_ADD %4, %4 G_BR %bb.2 bb.2: - %6:_(<3 x s64>) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(<4 x s64>) = G_IMPLICIT_DEF - %8:_(<4 x s64>) = G_INSERT %7, %6, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %8 + %6:_(<3 x i64>) = G_PHI %4(<3 x i64>), %bb.0, %5(<3 x i64>), %bb.1 + %7:_(<4 x i64>) = G_IMPLICIT_DEF + %8:_(<4 x i64>) = G_INSERT %7, %6(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %8(<4 x i64>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_p3 @@ -989,17 +1045,17 @@ body: | ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1007,28 +1063,32 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](p3) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s32) = G_CONSTANT i32 8 - %5:_(p3) = G_PTR_ADD %0, %4 + %4:_(i32) = G_CONSTANT i32 8 + %5:_(p3) = G_PTR_ADD %0, %4(i32) G_BR %bb.2 bb.2: - %6:_(p3) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(p3) = G_PHI %0(p3), %bb.0, %5(p3), %bb.1 + $vgpr0 = COPY %6(p3) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_p5 @@ -1041,17 +1101,17 @@ body: | ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1059,28 +1119,32 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](p5) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 %0:_(p5) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s32) = G_CONSTANT i32 8 - %5:_(p5) = G_PTR_ADD %0, %4 + %4:_(i32) = G_CONSTANT i32 8 + %5:_(p5) = G_PTR_ADD %0, %4(i32) G_BR %bb.2 bb.2: - %6:_(p5) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(p5) = G_PHI %0(p5), %bb.0, %5(p5), %bb.1 + $vgpr0 = COPY %6(p5) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_p0 @@ -1093,17 +1157,17 @@ body: | ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](i64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1111,28 +1175,32 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p0) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s64) = G_CONSTANT i64 8 - %5:_(p0) = G_PTR_ADD %0, %4 + %4:_(i64) = G_CONSTANT i64 8 + %5:_(p0) = G_PTR_ADD %0, %4(i64) G_BR %bb.2 bb.2: - %6:_(p0) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0_vgpr1 = COPY %6 + %6:_(p0) = G_PHI %0(p0), %bb.0, %5(p0), %bb.1 + $vgpr0_vgpr1 = COPY %6(p0) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_p1 @@ -1145,17 +1213,17 @@ body: | ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1163,28 +1231,32 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p1) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s64) = G_CONSTANT i64 8 - %5:_(p1) = G_PTR_ADD %0, %4 + %4:_(i64) = G_CONSTANT i64 8 + %5:_(p1) = G_PTR_ADD %0, %4(i64) G_BR %bb.2 bb.2: - %6:_(p1) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0_vgpr1 = COPY %6 + %6:_(p1) = G_PHI %0(p1), %bb.0, %5(p1), %bb.1 + $vgpr0_vgpr1 = COPY %6(p1) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_p4 @@ -1197,17 +1269,17 @@ body: | ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](i64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1215,28 +1287,32 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p4) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s64) = G_CONSTANT i64 8 - %5:_(p4) = G_PTR_ADD %0, %4 + %4:_(i64) = G_CONSTANT i64 8 + %5:_(p4) = G_PTR_ADD %0, %4(i64) G_BR %bb.2 bb.2: - %6:_(p4) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0_vgpr1 = COPY %6 + %6:_(p4) = G_PHI %0(p4), %bb.0, %5(p4), %bb.1 + $vgpr0_vgpr1 = COPY %6(p4) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_p9999 @@ -1249,10 +1325,10 @@ body: | ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p9999) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -1266,27 +1342,31 @@ body: | ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p9999) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1, $vgpr2 %0:_(p9999) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) %4:_(p9999) = G_IMPLICIT_DEF G_BR %bb.2 bb.2: - %5:_(p9999) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1 = COPY %5 + %5:_(p9999) = G_PHI %0(p9999), %bb.0, %4(p9999), %bb.1 + $vgpr0_vgpr1 = COPY %5(p9999) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_s1 @@ -1298,49 +1378,53 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[DEF]](s1), %bb.1 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i1) = G_PHI [[TRUNC]](i1), %bb.0, [[DEF]](i1), %bb.1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[PHI]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - %4:_(s1) = G_TRUNC %1 - G_BRCOND %3, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %4:_(i1) = G_TRUNC %1(i32) + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s1) = G_IMPLICIT_DEF + %5:_(i1) = G_IMPLICIT_DEF G_BR %bb.2 bb.2: - %6:_(s1) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %7 + %6:_(i1) = G_PHI %4(i1), %bb.0, %5(i1), %bb.1 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0 = COPY %7(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_s7 @@ -1352,52 +1436,56 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i16) = G_PHI [[TRUNC]](i16), %bb.0, [[TRUNC1]](i16), %bb.1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[PHI]](i16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - %4:_(s7) = G_TRUNC %1 - G_BRCOND %3, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %4:_(i7) = G_TRUNC %1(i32) + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s7) = G_IMPLICIT_DEF + %5:_(i7) = G_IMPLICIT_DEF G_BR %bb.2 bb.2: - %6:_(s7) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %7 + %6:_(i7) = G_PHI %4(i7), %bb.0, %5(i7), %bb.1 + %7:_(i32) = G_ZEXT %6(i7) + $vgpr0 = COPY %7(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_s8 @@ -1409,52 +1497,56 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i16) = G_PHI [[TRUNC]](i16), %bb.0, [[TRUNC1]](i16), %bb.1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[PHI]](i16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - %4:_(s8) = G_TRUNC %1 - G_BRCOND %3, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %4:_(i8) = G_TRUNC %1(i32) + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s8) = G_IMPLICIT_DEF + %5:_(i8) = G_IMPLICIT_DEF G_BR %bb.2 bb.2: - %6:_(s8) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %7 + %6:_(i8) = G_PHI %4(i8), %bb.0, %5(i8), %bb.1 + %7:_(i32) = G_ZEXT %6(i8) + $vgpr0 = COPY %7(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_s16 @@ -1466,49 +1558,53 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[DEF]](s16), %bb.1 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i16) = G_PHI [[TRUNC]](i16), %bb.0, [[DEF]](i16), %bb.1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[PHI]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - %4:_(s16) = G_TRUNC %1 - G_BRCOND %3, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %4:_(i16) = G_TRUNC %1(i32) + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s16) = G_IMPLICIT_DEF + %5:_(i16) = G_IMPLICIT_DEF G_BR %bb.2 bb.2: - %6:_(s16) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %7 + %6:_(i16) = G_PHI %4(i16), %bb.0, %5(i16), %bb.1 + %7:_(i32) = G_ZEXT %6(i16) + $vgpr0 = COPY %7(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_s128 @@ -1520,51 +1616,55 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV4]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32), [[UADDE2]](i32), [[UADDE4]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s128) = G_PHI [[COPY]](s128), %bb.0, [[MV]](s128), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](s128) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i128) = G_PHI [[COPY]](i128), %bb.0, [[MV]](i128), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](i128) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s128) = G_ADD %0, %0 + %4:_(i128) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(s128) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + %5:_(i128) = G_PHI %0(i128), %bb.0, %4(i128), %bb.1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(i128) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_s256 @@ -1576,55 +1676,59 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV8]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV9]], [[UADDO1]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV10]], [[UADDE1]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV11]], [[UADDE3]] - ; CHECK-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV4]], [[UV12]], [[UADDE5]] - ; CHECK-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV13]], [[UADDE7]] - ; CHECK-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV6]], [[UV14]], [[UADDE9]] - ; CHECK-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV15]], [[UADDE11]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32), [[UADDE6]](s32), [[UADDE8]](s32), [[UADDE10]](s32), [[UADDE12]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i256) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i256) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV8]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV9]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV2]], [[UV10]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[UV3]], [[UV11]], [[UADDE3]] + ; CHECK-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UV4]], [[UV12]], [[UADDE5]] + ; CHECK-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV13]], [[UADDE7]] + ; CHECK-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UV6]], [[UV14]], [[UADDE9]] + ; CHECK-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[UV7]], [[UV15]], [[UADDE11]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32), [[UADDE2]](i32), [[UADDE4]](i32), [[UADDE6]](i32), [[UADDE8]](i32), [[UADDE10]](i32), [[UADDE12]](i32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s256) = G_PHI [[COPY]](s256), %bb.0, [[MV]](s256), %bb.1 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](s256) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i256) = G_PHI [[COPY]](i256), %bb.0, [[MV]](i256), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](i256) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %1, %2 - G_BRCOND %3, %bb.1 + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + G_BRCOND %3(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %4:_(s256) = G_ADD %0, %0 + %4:_(i256) = G_ADD %0, %0 G_BR %bb.2 bb.2: - %5:_(s256) = G_PHI %0, %bb.0, %4, %bb.1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + %5:_(i256) = G_PHI %0(i256), %bb.0, %4(i256), %bb.1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(i256) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: test_phi_v2s1 @@ -1636,74 +1740,78 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LSHR]](s32), [[LSHR1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[AND]](i32), [[AND1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[LSHR]](i32), [[LSHR1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](i1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND2]](s32), [[AND3]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LSHR2]](s32), [[LSHR3]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C3]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C4]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[AND2]](i32), [[AND3]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[LSHR2]](i32), [[LSHR3]] ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(i1) = G_PHI [[ICMP]](i1), %bb.0, [[ICMP3]](i1), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(i1) = G_PHI [[ICMP1]](i1), %bb.0, [[ICMP4]](i1), %bb.1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[PHI]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[PHI1]](i1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND4]](i32), [[AND5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1 - %6:_(s1) = G_ICMP intpred(eq), %3, %4 - G_BRCOND %6, %bb.1 + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr1 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(<2 x i1>) = G_ICMP intpred(eq), %0(<2 x i16>), %1 + %6:_(i1) = G_ICMP intpred(eq), %3(i32), %4 + G_BRCOND %6(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %7:_(<2 x s1>) = G_ICMP intpred(ne), %0, %2 + %7:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i16>), %2 G_BR %bb.2 bb.2: - %8:_(<2 x s1>) = G_PHI %5, %bb.0, %7, %bb.1 - %9:_(<2 x s32>) = G_ZEXT %8 - $vgpr0_vgpr1 = COPY %9 + %8:_(<2 x i1>) = G_PHI %5(<2 x i1>), %bb.0, %7(<2 x i1>), %bb.1 + %9:_(<2 x i32>) = G_ZEXT %8(<2 x i1>) + $vgpr0_vgpr1 = COPY %9(<2 x i32>) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir index 09e1109c36293..44e51616cf411 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir @@ -11,14 +11,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p1) = G_PTR_ADD %0, %1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p1) = G_PTR_ADD %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(p1) - $vgpr0_vgpr1 = COPY %2 ... --- @@ -31,14 +31,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p0) = G_PTR_ADD %0, %1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p0) = G_PTR_ADD %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(p0) - $vgpr0_vgpr1 = COPY %2 ... --- @@ -51,14 +51,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[COPY1]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p4) = G_PTR_ADD %0, %1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p4) = G_PTR_ADD %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(p4) - $vgpr0_vgpr1 = COPY %2 ... --- @@ -71,14 +71,14 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p3) = G_PTR_ADD %0, %1 + %1:_(i32) = COPY $vgpr1 + %2:_(p3) = G_PTR_ADD %0, %1(i32) + $vgpr0 = COPY %2(p3) - $vgpr0 = COPY %2 ... --- @@ -91,14 +91,14 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[COPY1]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p5) = G_PTR_ADD %0, %1 + %1:_(i32) = COPY $vgpr1 + %2:_(p5) = G_PTR_ADD %0, %1(i32) + $vgpr0 = COPY %2(p5) - $vgpr0 = COPY %2 ... --- @@ -111,14 +111,14 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p6) = G_PTR_ADD [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p6) = G_PTR_ADD [[COPY]], [[COPY1]](i32) ; CHECK-NEXT: $sgpr0 = COPY [[PTR_ADD]](p6) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(p6) = G_PTR_ADD %0, %1 + %1:_(i32) = COPY $sgpr1 + %2:_(p6) = G_PTR_ADD %0, %1(i32) + $sgpr0 = COPY %2(p6) - $sgpr0 = COPY %2 ... --- @@ -131,14 +131,14 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY]], [[COPY1]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p2) %0:_(p2) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p2) = G_PTR_ADD %0, %1 + %1:_(i32) = COPY $vgpr1 + %2:_(p2) = G_PTR_ADD %0, %1(i32) + $vgpr0 = COPY %2(p2) - $vgpr0 = COPY %2 ... --- @@ -151,13 +151,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p999) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p999) = G_PTR_ADD [[COPY]], [[COPY1]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p999) %0:_(p999) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p999) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p999) = G_PTR_ADD %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(p999) ... --- @@ -170,17 +170,17 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[UV2]](s64) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[UV3]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[UV2]](i64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[UV3]](i64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x p1>) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x p1>) = G_PTR_ADD %0, %1(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x p1>) ... --- @@ -193,15 +193,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[UV2]](s32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[UV3]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[UV2]](i32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[UV3]](i32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x p3>) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x p3>) = G_PTR_ADD %0, %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x p3>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir index 80d1aa4d8889d..de967ac0b947d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir @@ -11,17 +11,17 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(p1) = G_PTRMASK %0, %2 - $vgpr0_vgpr1 = COPY %3 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(p1) = G_PTRMASK %0, %2(i16) + $vgpr0_vgpr1 = COPY %3(p1) ... --- @@ -34,14 +34,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[ZEXT]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY1]](i32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[ZEXT]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(p1) = G_PTRMASK %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i32) = COPY $vgpr2 + %2:_(p1) = G_PTRMASK %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(p1) ... --- @@ -54,13 +54,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[COPY1]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p1) = G_PTRMASK %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p1) = G_PTRMASK %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(p1) ... --- @@ -73,14 +73,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[TRUNC]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY1]](i96) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[TRUNC]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(p1) = G_PTRMASK %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(p1) = G_PTRMASK %0, %1(i96) + $vgpr0_vgpr1 = COPY %2(p1) ... --- @@ -93,17 +93,17 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(p0) = G_PTRMASK %0, %2 - $vgpr0_vgpr1 = COPY %3 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(p0) = G_PTRMASK %0, %2(i16) + $vgpr0_vgpr1 = COPY %3(p0) ... --- @@ -116,14 +116,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[ZEXT]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY1]](i32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[ZEXT]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(p0) = G_PTRMASK %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i32) = COPY $vgpr2 + %2:_(p0) = G_PTRMASK %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(p0) ... --- @@ -136,13 +136,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[COPY1]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p0) = G_PTRMASK %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p0) = G_PTRMASK %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(p0) ... --- @@ -155,14 +155,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[TRUNC]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY1]](i96) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[TRUNC]](i64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(p0) = G_PTRMASK %0, %1 - $vgpr0_vgpr1 = COPY %2 + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(p0) = G_PTRMASK %0, %1(i96) + $vgpr0_vgpr1 = COPY %2(p0) ... --- @@ -175,16 +175,16 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[AND]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(p3) = G_PTRMASK %0, %2 - $vgpr0 = COPY %3 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(p3) = G_PTRMASK %0, %2(i16) + $vgpr0 = COPY %3(p3) ... --- @@ -197,13 +197,13 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p3) = G_PTRMASK %0, %1 - $vgpr0 = COPY %2 + %1:_(i32) = COPY $vgpr1 + %2:_(p3) = G_PTRMASK %0, %1(i32) + $vgpr0 = COPY %2(p3) ... --- @@ -216,14 +216,14 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = COPY $vgpr1_vgpr2 - %2:_(p3) = G_PTRMASK %0, %1 - $vgpr0 = COPY %2 + %1:_(i64) = COPY $vgpr1_vgpr2 + %2:_(p3) = G_PTRMASK %0, %1(i64) + $vgpr0 = COPY %2(p3) ... --- @@ -236,12 +236,12 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i96) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](i32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = COPY $vgpr1_vgpr2_vgpr3 - %2:_(p3) = G_PTRMASK %0, %1 - $vgpr0 = COPY %2 + %1:_(i96) = COPY $vgpr1_vgpr2_vgpr3 + %2:_(p3) = G_PTRMASK %0, %1(i96) + $vgpr0 = COPY %2(p3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrtoint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrtoint.mir index 59945720ebab5..53317a285043c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrtoint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrtoint.mir @@ -11,11 +11,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY]](p0) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_PTRTOINT %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_PTRTOINT %0(p0) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -28,11 +28,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY]](p1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_PTRTOINT %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_PTRTOINT %0(p1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -45,11 +45,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p4) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY]](p4) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](i64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_PTRTOINT %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_PTRTOINT %0(p4) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -62,11 +62,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_PTRTOINT %0 - $vgpr0 = COPY %1 + %1:_(i32) = G_PTRTOINT %0(p3) + $vgpr0 = COPY %1(i32) ... --- @@ -79,11 +79,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_PTRTOINT %0 - $vgpr0 = COPY %1 + %1:_(i32) = G_PTRTOINT %0(p5) + $vgpr0 = COPY %1(i32) ... --- @@ -96,11 +96,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p999) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY]](p999) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRTOINT]](i64) %0:_(p999) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_PTRTOINT %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_PTRTOINT %0(p999) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -113,12 +113,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PTRTOINT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY]](p0) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[PTRTOINT]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_PTRTOINT %0 - $vgpr0 = COPY %1 + %1:_(i32) = G_PTRTOINT %0(p0) + $vgpr0 = COPY %1(i32) ... --- @@ -131,13 +131,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[PTRTOINT]](s64), [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY]](p0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[PTRTOINT]](i64), [[C]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_PTRTOINT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_PTRTOINT %0(p0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -151,13 +151,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[UV]](p0) - ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[UV1]](p0) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[PTRTOINT]](s64), [[PTRTOINT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[UV]](p0) + ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(i64) = G_PTRTOINT [[UV1]](p0) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[PTRTOINT]](i64), [[PTRTOINT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_PTRTOINT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_PTRTOINT %0(<2 x p0>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -171,15 +171,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[UV]](p0) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PTRTOINT]](s64) - ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[UV1]](p0) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[PTRTOINT1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[UV]](p0) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[PTRTOINT]](i64) + ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(i64) = G_PTRTOINT [[UV1]](p0) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[PTRTOINT1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_PTRTOINT %0 - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_PTRTOINT %0(<2 x p0>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -192,12 +192,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s29) = G_TRUNC [[PTRTOINT]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s29) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i29) = G_TRUNC [[PTRTOINT]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i29) %0:_(p3) = COPY $vgpr0 - %1:_(s29) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:_(i29) = G_PTRTOINT %0(p3) + S_ENDPGM 0, implicit %1(i29) ... --- @@ -210,10 +210,10 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s33) = G_ZEXT [[PTRTOINT]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s33) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i33) = G_ZEXT [[PTRTOINT]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ZEXT]](i33) %0:_(p3) = COPY $vgpr0 - %1:_(s33) = G_PTRTOINT %0 - S_ENDPGM 0, implicit %1 + %1:_(i33) = G_PTRTOINT %0(p3) + S_ENDPGM 0, implicit %1(i33) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir index 7fdee12315754..f74ba0596e117 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -15,93 +15,93 @@ body: | ; GFX6-LABEL: name: rotl_i15 ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND3]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; GFX6-NEXT: $sgpr0 = COPY [[OR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 14 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 32767 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND1]](i32) + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C5]](i32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[AND3]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[LSHR1]] + ; GFX6-NEXT: $sgpr0 = COPY [[OR]](i32) ; ; GFX8-LABEL: name: rotl_i15 ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND1]](s16) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[COPY2]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[C6]](s16) - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[SUB4]], [[C4]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND3]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX8-NEXT: $sgpr0 = COPY [[OR]](s32) - %2:_(s32) = COPY $sgpr0 - %0:_(s15) = G_TRUNC %2(s32) - %3:_(s32) = COPY $sgpr1 - %1:_(s15) = G_TRUNC %3(s32) - %5:_(s15) = G_ROTL %0, %1(s15) - %4:_(s32) = G_ANYEXT %5(s15) - $sgpr0 = COPY %4 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 15 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32767 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT1]](i32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C4]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND1]](i16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 14 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i16) = COPY [[TRUNC]](i16) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[C5]], [[COPY2]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C4]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[AND2]], [[C6]](i16) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[SUB4]], [[C4]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[LSHR]], [[AND3]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX8-NEXT: $sgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i15) = G_TRUNC %0(i32) + %2:_(i32) = COPY $sgpr1 + %3:_(i15) = G_TRUNC %2(i32) + %4:_(i15) = G_ROTL %1, %3(i15) + %5:_(i32) = G_ANYEXT %4(i15) + $sgpr0 = COPY %5(i32) ... --- @@ -117,51 +117,51 @@ body: | ; GFX6-LABEL: name: rotl_i16 ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB]](i32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[ZEXT]](i32) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND1]](i16) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[ZEXT1]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: rotl_i16 ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %2:_(s32) = COPY $sgpr0 - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $sgpr1 - %1:_(s16) = G_TRUNC %3(s32) - %5:_(s16) = G_ROTL %0, %1(s16) - %4:_(s32) = G_ANYEXT %5(s16) - $sgpr0 = COPY %4 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C]], [[TRUNC1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[SUB]], [[C1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[AND1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[SHL]], [[LSHR]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $sgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(i16) = G_ROTL %1, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -177,16 +177,16 @@ body: | ; GFX-LABEL: name: rotl_i32 ; GFX: liveins: $sgpr0, $sgpr1 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[SUB]](s32) - ; GFX-NEXT: $sgpr0 = COPY [[FSHR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ROTL %0, %1(s32) - $sgpr0 = COPY %2 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY]], [[SUB]](i32) + ; GFX-NEXT: $sgpr0 = COPY [[FSHR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ROTL %0, %1(i32) + $sgpr0 = COPY %2(i32) ... --- @@ -202,48 +202,48 @@ body: | ; GFX-LABEL: name: rotl_i31 ; GFX: liveins: $sgpr0, $sgpr1 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; GFX-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) - ; GFX-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] - ; GFX-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] - ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] - ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] - ; GFX-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] - ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] - ; GFX-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) - ; GFX-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND3]](s32) - ; GFX-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; GFX-NEXT: $sgpr0 = COPY [[OR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s31) = G_TRUNC %0(s32) - %3:_(s31) = G_TRUNC %1(s32) - %4:_(s31) = G_ROTL %2, %3(s31) - %5:_(s32) = G_ANYEXT %4(s31) - $sgpr0 = COPY %5 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 30 + ; GFX-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[C1]](i32) + ; GFX-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C4]], [[C1]] + ; GFX-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[C1]] + ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[C1]] + ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C1]] + ; GFX-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[C1]] + ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[C1]] + ; GFX-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C2]] + ; GFX-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND1]](i32) + ; GFX-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SELECT1]] + ; GFX-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C2]] + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C5]](i32) + ; GFX-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C2]] + ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[AND3]](i32) + ; GFX-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[LSHR1]] + ; GFX-NEXT: $sgpr0 = COPY [[OR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i31) = G_TRUNC %0(i32) + %3:_(i31) = G_TRUNC %1(i32) + %4:_(i31) = G_ROTL %2, %3(i31) + %5:_(i32) = G_ANYEXT %4(i31) + $sgpr0 = COPY %5(i32) ... --- @@ -259,27 +259,27 @@ body: | ; GFX-LABEL: name: rotl_i64 ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 - ; GFX-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; GFX-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; GFX-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] - ; GFX-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32) - ; GFX-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] - ; GFX-NEXT: $sgpr0_sgpr1 = COPY [[OR]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_ROTL %0, %1(s64) - $sgpr0_sgpr1 = COPY %2 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $sgpr0_sgpr1 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $sgpr2_sgpr3 + ; GFX-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; GFX-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; GFX-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C1]] + ; GFX-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; GFX-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C1]] + ; GFX-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[TRUNC1]](i32) + ; GFX-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL]], [[LSHR]] + ; GFX-NEXT: $sgpr0_sgpr1 = COPY [[OR]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_ROTL %0, %1(i64) + $sgpr0_sgpr1 = COPY %2(i64) ... --- @@ -295,25 +295,25 @@ body: | ; GFX-LABEL: name: rotl_v4i32 ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV4]] - ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[SUB]](s32) - ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV5]] - ; GFX-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[SUB1]](s32) - ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV6]] - ; GFX-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[SUB2]](s32) - ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV7]] - ; GFX-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[SUB3]](s32) - ; GFX-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32) - ; GFX-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %2:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; GFX-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV4]] + ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV]], [[SUB]](i32) + ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV5]] + ; GFX-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV1]], [[SUB1]](i32) + ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV6]] + ; GFX-NEXT: [[FSHR2:%[0-9]+]]:_(i32) = G_FSHR [[UV2]], [[UV2]], [[SUB2]](i32) + ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV7]] + ; GFX-NEXT: [[FSHR3:%[0-9]+]]:_(i32) = G_FSHR [[UV3]], [[UV3]], [[SUB3]](i32) + ; GFX-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[FSHR]](i32), [[FSHR1]](i32), [[FSHR2]](i32), [[FSHR3]](i32) + ; GFX-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x i32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x i32>) = G_ROTL %0, %1(<4 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(<4 x i32>) ... --- @@ -329,51 +329,51 @@ body: | ; GFX6-LABEL: name: rotr_i16 ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[ZEXT]](s32) - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND2]](s16) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT1]](s32) - ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB]](i32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[AND]](i16) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[ZEXT]](i32) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[AND2]](i16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[ZEXT1]](i32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: rotr_i16 ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %2:_(s32) = COPY $sgpr0 - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $sgpr1 - %1:_(s16) = G_TRUNC %3(s32) - %5:_(s16) = G_ROTR %0, %1(s16) - %4:_(s32) = G_ANYEXT %5(s16) - $sgpr0 = COPY %4 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C]], [[TRUNC1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[AND]](i16) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[SUB]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $sgpr1 + %3:_(i16) = G_TRUNC %2(i32) + %4:_(i16) = G_ROTR %1, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... --- @@ -389,14 +389,14 @@ body: | ; GFX-LABEL: name: rotr_i32 ; GFX: liveins: $sgpr0, $sgpr1 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](s32) - ; GFX-NEXT: $sgpr0 = COPY [[FSHR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ROTR %0, %1(s32) - $sgpr0 = COPY %2 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $sgpr0 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $sgpr1 + ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](i32) + ; GFX-NEXT: $sgpr0 = COPY [[FSHR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ROTR %0, %1(i32) + $sgpr0 = COPY %2(i32) ... --- @@ -412,27 +412,27 @@ body: | ; GFX-LABEL: name: rotr_i64 ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 - ; GFX-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; GFX-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; GFX-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) - ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] - ; GFX-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC1]](s32) - ; GFX-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX-NEXT: $sgpr0_sgpr1 = COPY [[OR]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_ROTR %0, %1(s64) - $sgpr0_sgpr1 = COPY %2 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $sgpr0_sgpr1 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $sgpr2_sgpr3 + ; GFX-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 63 + ; GFX-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; GFX-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C1]] + ; GFX-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[AND]](i64) + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY]], [[TRUNC]](i32) + ; GFX-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[MV]], [[C1]] + ; GFX-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[AND1]](i64) + ; GFX-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC1]](i32) + ; GFX-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]] + ; GFX-NEXT: $sgpr0_sgpr1 = COPY [[OR]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_ROTR %0, %1(i64) + $sgpr0_sgpr1 = COPY %2(i64) ... --- @@ -448,19 +448,19 @@ body: | ; GFX-LABEL: name: rotr_v4i32 ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX-NEXT: {{ $}} - ; GFX-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[UV4]](s32) - ; GFX-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[UV5]](s32) - ; GFX-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[UV6]](s32) - ; GFX-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[UV7]](s32) - ; GFX-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32) - ; GFX-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - %2:_(<4 x s32>) = G_ROTR %0, %1(<4 x s32>) - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + ; GFX-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; GFX-NEXT: [[FSHR:%[0-9]+]]:_(i32) = G_FSHR [[UV]], [[UV]], [[UV4]](i32) + ; GFX-NEXT: [[FSHR1:%[0-9]+]]:_(i32) = G_FSHR [[UV1]], [[UV1]], [[UV5]](i32) + ; GFX-NEXT: [[FSHR2:%[0-9]+]]:_(i32) = G_FSHR [[UV2]], [[UV2]], [[UV6]](i32) + ; GFX-NEXT: [[FSHR3:%[0-9]+]]:_(i32) = G_FSHR [[UV3]], [[UV3]], [[UV7]](i32) + ; GFX-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[FSHR]](i32), [[FSHR1]](i32), [[FSHR2]](i32), [[FSHR3]](i32) + ; GFX-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x i32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x i32>) = G_ROTR %0, %1(<4 x i32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir index f977ad3327195..ecd2954d8b52d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: test_sadde_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[COPY]], [[COPY1]], [[ICMP]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SADDE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_SADDE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(i32), [[SADDE1:%[0-9]+]]:_(i1) = G_SADDE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SADDE1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[SADDE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_SADDE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -39,36 +39,36 @@ body: | ; CHECK-LABEL: name: test_sadde_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[UV2]], [[UV4]], [[ICMP]] - ; CHECK-NEXT: [[SADDE2:%[0-9]+]]:_(s32), [[SADDE3:%[0-9]+]]:_(s1) = G_SADDE [[UV3]], [[UV5]], [[ICMP1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDE]](s32), [[SADDE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %3 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %2, %4 - %6:_(<2 x s32>), %7:_(<2 x s1>) = G_SADDE %0, %1, %5 - %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %6 - $vgpr2_vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV]](i32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[C]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(i32), [[SADDE1:%[0-9]+]]:_(i1) = G_SADDE [[UV2]], [[UV4]], [[ICMP]] + ; CHECK-NEXT: [[SADDE2:%[0-9]+]]:_(i32), [[SADDE3:%[0-9]+]]:_(i1) = G_SADDE [[UV3]], [[UV5]], [[ICMP1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SADDE]](i32), [[SADDE2]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SADDE1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SADDE3]](i1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(<2 x i32>) = G_BUILD_VECTOR %3(i32), %3(i32) + %5:_(<2 x i1>) = G_ICMP intpred(eq), %2(<2 x i32>), %4 + %6:_(<2 x i32>), %7:_(<2 x i1>) = G_SADDE %0, %1, %5 + %8:_(<2 x i32>) = G_ZEXT %7(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) + $vgpr2_vgpr3 = COPY %8(<2 x i32>) ... --- @@ -80,30 +80,30 @@ body: | ; CHECK-LABEL: name: test_sadde_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %13, 16 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[COPY2]], [[ICMP]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s16) = G_TRUNC %0 - %6:_(s16) = G_TRUNC %1 - %7:_(s16), %8:_(s1) = G_SADDE %6, %7, %4 - %9:_(s32) = G_ANYEXT %7 - %10:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG %13, 16 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[SEXT_INREG1]](i32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[SEXT_INREG]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UADDE]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i16) = G_TRUNC %0(i32) + %6:_(i16) = G_TRUNC %1(i32) + %7:_(i16), %8:_(i1) = G_SADDE %6, %7, %4 + %9:_(i32) = G_ANYEXT %7(i16) + %10:_(i32) = G_ZEXT %8(i1) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) ... --- @@ -115,26 +115,26 @@ body: | ; CHECK-LABEL: name: test_sadde_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV]], [[UV2]], [[ICMP]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[UV1]], [[UV3]], [[UADDE1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[SADDE]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE1]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s64), %6:_(s1) = G_SADDE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0_vgpr1 = COPY %5 - $vgpr2 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV]], [[UV2]], [[ICMP]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(i32), [[SADDE1:%[0-9]+]]:_(i1) = G_SADDE [[UV1]], [[UV3]], [[UADDE1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDE]](i32), [[SADDE]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SADDE1]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i64), %6:_(i1) = G_SADDE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0_vgpr1 = COPY %5(i64) + $vgpr2 = COPY %7(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir index 16ad07e0df58e..956ba865e4b88 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -10,30 +10,30 @@ body: | ; CHECK-LABEL: name: test_saddo_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 7 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7), %5:_(s1) = G_SADDO %2, %3 - %6:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD]], 7 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7), %5:_(i1) = G_SADDO %2, %3 + %6:_(i32) = G_ZEXT %4(i7) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -45,28 +45,28 @@ body: | ; CHECK-LABEL: name: test_saddo_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16), %5:_(s1) = G_SADDO %2, %3 - %6:_(s32) = G_ANYEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16), %5:_(i1) = G_SADDO %2, %3 + %6:_(i32) = G_ANYEXT %4(i16) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -78,23 +78,23 @@ body: | ; CHECK-LABEL: name: test_saddo_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_SADDO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD]](i32), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[ADD]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_SADDO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0 = COPY %2(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -106,27 +106,27 @@ body: | ; CHECK-LABEL: name: test_saddo_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64), %3:_(s1) = G_SADDO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY1]](i64), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64), %3:_(i1) = G_SADDO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0_vgpr1 = COPY %2(i64) + $vgpr2 = COPY %4(i32) ... --- @@ -138,54 +138,54 @@ body: | ; CHECK-LABEL: name: test_saddo_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>), %3:_(<2 x s1>) = G_SADDO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1_vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST3]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG4]](i32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG5]](i32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND2]](i32), [[AND3]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x i16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>), %3:_(<2 x i1>) = G_SADDO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0 = COPY %2(<2 x i16>) + $vgpr1_vgpr2 = COPY %4(<2 x i32>) ... --- @@ -196,90 +196,90 @@ body: | ; CHECK-LABEL: name: test_saddo_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] - ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] - ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>), %7:_(<3 x s1>) = G_SADDO %2, %4 - %8:_(<3 x s16>) = G_IMPLICIT_DEF - %9:_(<6 x s16>) = G_CONCAT_VECTORS %6, %8 - %10:_(<3 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %9 - $vgpr0_vgpr1_vgpr2 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST5]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG4]](i32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG6]](i32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG7]](i32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG8]](i32), [[C1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP4]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i1) = G_XOR [[ICMP5]], [[ICMP2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR2]](i1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV13]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND5]](i32), [[AND6]](i32), [[AND7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>), %7:_(<3 x i1>) = G_SADDO %2, %4 + %8:_(<3 x i16>) = G_IMPLICIT_DEF + %9:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %8(<3 x i16>) + %10:_(<3 x i32>) = G_ZEXT %7(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %9(<6 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %10(<3 x i32>) ... --- @@ -291,92 +291,92 @@ body: | ; CHECK-LABEL: name: test_saddo_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD3]], 16 - ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] - ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] - ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] - ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s16>) = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x s16>) - ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 - %2:_(<4 x s16>), %3:_(<4 x s1>) = G_SADDO %0, %1 - %4:_(<4 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR4]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG4]](i32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ADD3]], 16 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR5]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG6]](i32), [[SEXT_INREG7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST8]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG8]](i32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR6]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG9]](i32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST9]], 16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG10]](i32), [[COPY4]] + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR7]], 16 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG11]](i32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP4]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP5]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i1) = G_XOR [[ICMP6]], [[ICMP2]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(i1) = G_XOR [[ICMP7]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR2]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR3]](i1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x i16>) = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND4]](i32), [[AND5]](i32), [[AND6]](i32), [[AND7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x i16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr1_vgpr2 + %2:_(<4 x i16>), %3:_(<4 x i1>) = G_SADDO %0, %1 + %4:_(<4 x i32>) = G_ZEXT %3(<4 x i1>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) + $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4(<4 x i32>) ... --- @@ -388,35 +388,35 @@ body: | ; CHECK-LABEL: name: test_saddo_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD1]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV6]](s32), [[C]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV7]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SADDO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[ADD1]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV6]](i32), [[C]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV7]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>), %3:_(<2 x i1>) = G_SADDO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 80b3166108ad8..866606deada3b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -14,69 +14,69 @@ body: | ; GFX6-LABEL: name: saddsat_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX8-LABEL: name: saddsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[ADD]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: saddsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SADDSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(i16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SADDSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SADDSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -88,69 +88,69 @@ body: | ; GFX6-LABEL: name: saddsat_s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX8-LABEL: name: saddsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[ADD]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: saddsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SADDSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(i16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SADDSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SADDSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -162,133 +162,133 @@ body: | ; GFX6-LABEL: name: saddsat_v2s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C1]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX2]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C1]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[ADD1]], [[C1]](i32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASHR]](i32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C5]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY2]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: saddsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX2]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ADD1]], [[C1]](s16) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C3]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[ADD]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[C3]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[ADD1]], [[C1]](i16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[ASHR]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: saddsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SADDSAT]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s8>) = G_BITCAST %2 - %5:_(<2 x s8>) = G_BITCAST %3 - %6:_(<2 x s8>) = G_SADDSAT %4, %5 - %7:_(s16) = G_BITCAST %6 - %8:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x i16>) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SADDSAT]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i8>) = G_BITCAST %2(i16) + %5:_(<2 x i8>) = G_BITCAST %3(i16) + %6:_(<2 x i8>) = G_SADDSAT %4, %5 + %7:_(i16) = G_BITCAST %6(<2 x i8>) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -300,61 +300,61 @@ body: | ; GFX6-LABEL: name: saddsat_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX8-LABEL: name: saddsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[TRUNC1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ADD]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: saddsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SADDSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(i16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SADDSAT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SADDSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -366,93 +366,93 @@ body: | ; GFX6-LABEL: name: saddsat_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[ADD1]], [[C]](i32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: saddsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC2]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[TRUNC2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: saddsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SADDSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x i16>) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SADDSAT %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -464,181 +464,181 @@ body: | ; GFX6-LABEL: name: saddsat_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] - ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] - ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] - ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[ADD1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(i32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX4]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(i32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN4]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(i32) = G_SMAX [[SUB5]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(i32) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[ADD2]], [[C]](i32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC3]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] - ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] - ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] - ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX4]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN4]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(i16) = G_SMAX [[SUB5]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(i16) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_SADDSAT %1, %2 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x i16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x i16>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[SADDSAT]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[SADDSAT1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_SADDSAT %1, %2 + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -650,159 +650,159 @@ body: | ; GFX6-LABEL: name: saddsat_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] - ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] - ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] - ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX6]] - ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN6]] - ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB7]], [[SHL7]] - ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB6]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[SMIN7]] - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ADD3]], [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[ADD1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(i32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX4]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(i32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN4]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(i32) = G_SMAX [[SUB5]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(i32) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[ADD2]], [[C]](i32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[C]](i32) + ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(i32) = G_SMAX [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMAX6]] + ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(i32) = G_SMIN [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SMIN6]] + ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(i32) = G_SMAX [[SUB7]], [[SHL7]] + ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(i32) = G_SMIN [[SMAX7]], [[SUB6]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[SHL6]], [[SMIN7]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[ADD3]], [[C]](i32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: saddsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC4]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] - ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] - ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC6]] - ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX6]] - ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN6]] - ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB7]], [[TRUNC7]] - ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB6]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[SMIN7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB1]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX4]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN4]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(i16) = G_SMAX [[SUB5]], [[TRUNC6]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(i16) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i16) = G_SUB [[C1]], [[SMAX6]] + ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i16) = G_SUB [[C2]], [[SMIN6]] + ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(i16) = G_SMAX [[SUB7]], [[TRUNC7]] + ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(i16) = G_SMIN [[SMAX7]], [[SUB6]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i16) = G_ADD [[TRUNC3]], [[SMIN7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ADD3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: saddsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x s16>), [[SADDSAT1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SADDSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x i16>) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x i16>) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x i16>), [[SADDSAT1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -814,48 +814,48 @@ body: | ; GFX6-LABEL: name: saddsat_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] - ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[COPY1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[SMIN1]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX8-LABEL: name: saddsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] - ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[COPY1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[SMIN1]] + ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX9-LABEL: name: saddsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SADDSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(i32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SADDSAT %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -867,72 +867,72 @@ body: | ; GFX6-LABEL: name: saddsat_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[UV2]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[SMIN1]] + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[SMIN3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: saddsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB1]], [[UV2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[C]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[SMIN3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: saddsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SADDSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(i32) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(i32) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SADDSAT]](i32), [[SADDSAT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -944,84 +944,84 @@ body: | ; GFX6-LABEL: name: saddsat_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY1]](i64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX8-LABEL: name: saddsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY1]](i64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: saddsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SADDSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY1]](i64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1033,139 +1033,139 @@ body: | ; GFX6-LABEL: name: saddsat_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY3]], [[C1]](s32) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[COPY3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[UV]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV2]](i64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[UV12]], [[UV14]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO4]](i32), [[UADDE4]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV2]](i64), [[UV1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV3]](i64), [[C]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[MV2]](i64) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY3]], [[C1]](i32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UV16]], [[UV18]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO6]](i32), [[UADDE6]](i32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[XOR1]](i1), [[MV3]], [[COPY3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: saddsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY3]], [[C1]](s32) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[COPY3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV2]](i64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[UV12]], [[UV14]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO4]](i32), [[UADDE4]](i32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV2]](i64), [[UV1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV3]](i64), [[C]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[MV2]](i64) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY3]], [[C1]](i32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO6]](i32), [[UADDE6]](i32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[XOR1]](i1), [[MV3]], [[COPY3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: saddsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY3]], [[C1]](s32) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[COPY3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_SADDSAT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV2]](i64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[UV12]], [[UV14]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO4]](i32), [[UADDE4]](i32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV2]](i64), [[UV1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV3]](i64), [[C]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[MV2]](i64) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY3]], [[C1]](i32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO6]](i32), [[UADDE6]](i32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[XOR1]](i1), [[MV3]], [[COPY3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_SADDSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir index 0e6b692cbcfb8..f63b8a2347be6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir @@ -12,16 +12,16 @@ body: | ; GCN-LABEL: name: test_sbfx_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %copy:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %offset:_(s32) = COPY $vgpr1 - ; GCN-NEXT: %width:_(s32) = COPY $vgpr2 - ; GCN-NEXT: %sbfx:_(s32) = G_SBFX %copy, %offset(s32), %width - ; GCN-NEXT: $vgpr0 = COPY %sbfx(s32) - %copy:_(s32) = COPY $vgpr0 - %offset:_(s32) = COPY $vgpr1 - %width:_(s32) = COPY $vgpr2 - %sbfx:_(s32) = G_SBFX %copy, %offset(s32), %width - $vgpr0 = COPY %sbfx(s32) + ; GCN-NEXT: %copy:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %offset:_(i32) = COPY $vgpr1 + ; GCN-NEXT: %width:_(i32) = COPY $vgpr2 + ; GCN-NEXT: %sbfx:_(i32) = G_SBFX %copy, %offset(i32), %width + ; GCN-NEXT: $vgpr0 = COPY %sbfx(i32) + %copy:_(i32) = COPY $vgpr0 + %offset:_(i32) = COPY $vgpr1 + %width:_(i32) = COPY $vgpr2 + %sbfx:_(i32) = G_SBFX %copy, %offset(i32), %width + $vgpr0 = COPY %sbfx(i32) ... --- @@ -33,16 +33,16 @@ body: | ; GCN-LABEL: name: test_sbfx_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %offset:_(s32) = COPY $vgpr2 - ; GCN-NEXT: %width:_(s32) = COPY $vgpr3 - ; GCN-NEXT: %sbfx:_(s64) = G_SBFX %copy, %offset(s32), %width - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %sbfx(s64) - %copy:_(s64) = COPY $vgpr0_vgpr1 - %offset:_(s32) = COPY $vgpr2 - %width:_(s32) = COPY $vgpr3 - %sbfx:_(s64) = G_SBFX %copy, %offset(s32), %width - $vgpr0_vgpr1 = COPY %sbfx(s64) + ; GCN-NEXT: %copy:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %offset:_(i32) = COPY $vgpr2 + ; GCN-NEXT: %width:_(i32) = COPY $vgpr3 + ; GCN-NEXT: %sbfx:_(i64) = G_SBFX %copy, %offset(i32), %width + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %sbfx(i64) + %copy:_(i64) = COPY $vgpr0_vgpr1 + %offset:_(i32) = COPY $vgpr2 + %width:_(i32) = COPY $vgpr3 + %sbfx:_(i64) = G_SBFX %copy, %offset(i32), %width + $vgpr0_vgpr1 = COPY %sbfx(i64) ... --- @@ -54,24 +54,24 @@ body: | ; GCN-LABEL: name: test_sbfx_s8 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[AND]](s32), [[AND1]] - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SBFX]], 8 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %copy:_(s8) = G_TRUNC %0 - %offset:_(s8) = G_TRUNC %1 - %width:_(s8) = G_TRUNC %2 - %sbfx:_(s8) = G_SBFX %copy, %offset, %width - %4:_(s32) = G_SEXT %sbfx - $vgpr0 = COPY %4 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GCN-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(i32) = G_SBFX [[COPY]], [[AND]](i32), [[AND1]] + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SBFX]], 8 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %copy:_(i8) = G_TRUNC %0(i32) + %offset:_(i8) = G_TRUNC %1(i32) + %width:_(i8) = G_TRUNC %2(i32) + %sbfx:_(i8) = G_SBFX %copy, %offset(i8), %width + %7:_(i32) = G_SEXT %sbfx(i8) + $vgpr0 = COPY %7(i32) ... --- @@ -83,22 +83,22 @@ body: | ; GCN-LABEL: name: test_sbfx_s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[AND]](s32), [[AND1]] - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SBFX]], 16 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %copy:_(s16) = G_TRUNC %0 - %offset:_(s16) = G_TRUNC %1 - %width:_(s16) = G_TRUNC %2 - %sbfx:_(s16) = G_SBFX %copy, %offset, %width - %4:_(s32) = G_SEXT %sbfx - $vgpr0 = COPY %4 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GCN-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(i32) = G_SBFX [[COPY]], [[AND]](i32), [[AND1]] + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SBFX]], 16 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %copy:_(i16) = G_TRUNC %0(i32) + %offset:_(i16) = G_TRUNC %1(i32) + %width:_(i16) = G_TRUNC %2(i32) + %sbfx:_(i16) = G_SBFX %copy, %offset(i16), %width + %7:_(i32) = G_SEXT %sbfx(i16) + $vgpr0 = COPY %7(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index f9ec3bca78931..9b0cc8848c3d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -14,159 +14,162 @@ body: | ; GFX6-LABEL: name: test_sdiv_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX8-LABEL: name: test_sdiv_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX9-LABEL: name: test_sdiv_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX10-LABEL: name: test_sdiv_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SDIV %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -178,283 +181,286 @@ body: | ; GFX6-LABEL: name: test_sdiv_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX6-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX6-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR5]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C3]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C3]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX6-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX6-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB3]](i32), [[SUB7]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX8-LABEL: name: test_sdiv_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR5]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C3]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C3]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB3]](i32), [[SUB7]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_sdiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR5]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C3]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C3]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB3]](i32), [[SUB7]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX10-LABEL: name: test_sdiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR5]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C3]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB3]](i32), [[SUB7]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SDIV %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -466,695 +472,698 @@ body: | ; GFX6-LABEL: name: test_sdiv_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD15]](i32) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV28]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX8-LABEL: name: test_sdiv_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD11]](i32) + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV40]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX9-LABEL: name: test_sdiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD11]](i32) + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV40]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX10-LABEL: name: test_sdiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO7]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD15]](i32) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV28]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV30]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV32]], [[UV34]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV33]], [[UV35]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SDIV %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1166,1339 +1175,1342 @@ body: | ; GFX6-LABEL: name: test_sdiv_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] - ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] - ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) - ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) - ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) - ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] - ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] - ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX6-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) - ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] - ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] - ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] - ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] - ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] - ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX6-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] - ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] - ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] - ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] - ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX6-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] - ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] - ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) - ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX6-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX6-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX6-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX6-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX6-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] - ; GFX6-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] - ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD15]](i32) + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[ADD17]] + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV26]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV28]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV30]], [[UV32]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO42]](i32), [[UADDE12]](i32) + ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UV38]], [[UV40]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO44]](i32), [[UADDE14]](i32) + ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV42]](i32) + ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV43]](i32) + ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV44]], [[UV46]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] + ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[USUBE10]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD20]] + ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(i32), [[UADDE17:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[UADDO56]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[USUBE10]], [[UADDO56]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[USUBO8]], [[UADDO56]] + ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UADDO56]], [[ADD25]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[MUL24]] + ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[ADD25]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[ADD25]] + ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD26]] + ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[ADD25]] + ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(i32), [[UADDE19:%[0-9]+]]:_(i1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(i32) = G_MUL [[UV51]], [[UADDO68]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(i32) = G_MUL [[UV50]], [[UADDE18]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(i32) = G_UMULH [[UV50]], [[UADDO68]] + ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(i32) = G_MUL [[UV51]], [[UADDE18]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(i32) = G_UMULH [[UV51]], [[UADDO68]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(i32) = G_UMULH [[UV50]], [[UADDE18]] + ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX6-NEXT: [[UADDO76:%[0-9]+]]:_(i32), [[UADDO77:%[0-9]+]]:_(i1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO77]](i1) + ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6-NEXT: [[UADDO78:%[0-9]+]]:_(i32), [[UADDO79:%[0-9]+]]:_(i1) = G_UADDO [[UADDO76]], [[ADD30]] + ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO79]](i1) + ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(i32) = G_UMULH [[UV51]], [[UADDE18]] + ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO78]](i32), [[ADD33]](i32) + ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(i32) = G_MUL [[UV52]], [[UADDO78]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(i32) = G_MUL [[UV53]], [[UADDO78]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(i32) = G_MUL [[UV52]], [[ADD33]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(i32) = G_UMULH [[UV52]], [[UADDO78]] + ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV48]], [[MUL33]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV49]], [[ADD35]] + ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE12]](i32), [[UV55]] + ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV54]] + ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE12]](i32), [[UV55]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] + ; GFX6-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO80:%[0-9]+]]:_(i32), [[UADDO81:%[0-9]+]]:_(i1) = G_UADDO [[UADDO78]], [[UV56]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(i32), [[UADDE21:%[0-9]+]]:_(i1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] + ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO80]](i32), [[UADDE20]](i32) + ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV55]] + ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV54]] + ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV55]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX6-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO82:%[0-9]+]]:_(i32), [[UADDO83:%[0-9]+]]:_(i1) = G_UADDO [[UADDO80]], [[UV58]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(i32), [[UADDE23:%[0-9]+]]:_(i1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] + ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO82]](i32), [[UADDE22]](i32) + ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX6-NEXT: [[XOR6:%[0-9]+]]:_(i64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX6-NEXT: [[XOR7:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX6-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR7]](i64) + ; GFX6-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR6]](i64) + ; GFX6-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[UV60]], [[UV62]] + ; GFX6-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] + ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE18]](i32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; GFX8-LABEL: name: test_sdiv_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV42]], [[UV44]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV43]], [[UV45]], [[USUBO7]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] - ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] - ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) - ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV54]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV55]](s32) - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV56]], [[UV58]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV57]], [[UV59]], [[USUBO9]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] - ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] - ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] - ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) - ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] - ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV69]], [[UV76]] - ; GFX8-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV79]] - ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV78]] - ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV79]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV78]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] - ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX8-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] - ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] - ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] - ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] - ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] - ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] - ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) - ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX8-NEXT: [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX8-NEXT: [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV84]], [[UV86]] - ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV85]], [[UV87]], [[USUBO15]] - ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV23]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD11]](i32) + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV33]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV31]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV32]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV27]], [[UV34]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV37]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV36]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV37]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV38]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV37]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV36]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV37]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV40]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV42]], [[UV44]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV43]], [[UV45]], [[USUBO7]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] + ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO42]](i32), [[UADDE12]](i32) + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UV50]], [[UV52]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] + ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO44]](i32), [[UADDE14]](i32) + ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV54]](i32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV55]](i32) + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV56]], [[UV58]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV57]], [[UV59]], [[USUBO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI2]], [[C5]] + ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV61]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV60]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV62]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV60]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH12]] + ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV62]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV60]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV62]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH14]] + ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD12]] + ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV62]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(i32), [[UADDE17:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDO56]], [[C5]] + ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV65]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDE16]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](i32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[UV64]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO56]], [[UV66]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[UV64]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH16]] + ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[UV66]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[UV64]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[UV66]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH18]] + ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD16]] + ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[UV66]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(i32), [[UADDE19:%[0-9]+]]:_(i1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(i32), [[UV69:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(i32), [[UV71:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV71]], [[UADDO68]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV70]], [[UADDE18]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV70]], [[UADDO68]] + ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH20]] + ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV71]], [[UADDE18]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV71]], [[UADDO68]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV70]], [[UADDE18]] + ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(i32), [[UADDO77:%[0-9]+]]:_(i1) = G_UADDO [[UADDO74]], [[UMULH22]] + ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO77]](i1) + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(i32), [[UADDO79:%[0-9]+]]:_(i1) = G_UADDO [[UADDO76]], [[ADD20]] + ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO79]](i1) + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV71]], [[UADDE18]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO78]](i32), [[ADD23]](i32) + ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(i32), [[UV73:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV72]](i32), [[UADDO78]], [[C5]] + ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(i32), [[UV75:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV75]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV72]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV73]](i32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(i32), [[UV77:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV68]], [[UV74]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV69]], [[UV76]] + ; GFX8-NEXT: [[UV78:%[0-9]+]]:_(i32), [[UV79:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE12]](i32), [[UV79]] + ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV78]] + ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE12]](i32), [[UV79]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV78]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] + ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] + ; GFX8-NEXT: [[UV80:%[0-9]+]]:_(i32), [[UV81:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(i32), [[UADDO81:%[0-9]+]]:_(i1) = G_UADDO [[UADDO78]], [[UV80]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(i32), [[UADDE21:%[0-9]+]]:_(i1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] + ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO80]](i32), [[UADDE20]](i32) + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV79]] + ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV78]] + ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV79]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX8-NEXT: [[UV82:%[0-9]+]]:_(i32), [[UV83:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(i32), [[UADDO83:%[0-9]+]]:_(i1) = G_UADDO [[UADDO80]], [[UV82]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(i32), [[UADDE23:%[0-9]+]]:_(i1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] + ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO82]](i32), [[UADDE22]](i32) + ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(i64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX8-NEXT: [[UV84:%[0-9]+]]:_(i32), [[UV85:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR7]](i64) + ; GFX8-NEXT: [[UV86:%[0-9]+]]:_(i32), [[UV87:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR6]](i64) + ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[UV84]], [[UV86]] + ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[UV85]], [[UV87]], [[USUBO15]] + ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE18]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; GFX9-LABEL: name: test_sdiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV42]], [[UV44]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV43]], [[UV45]], [[USUBO7]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) - ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV54]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV55]](s32) - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV56]], [[UV58]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV57]], [[UV59]], [[USUBO9]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] - ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] - ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) - ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] - ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV69]], [[UV76]] - ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV79]] - ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV78]] - ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV79]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV78]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] - ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX9-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] - ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] - ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] - ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] - ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] - ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] - ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) - ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX9-NEXT: [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX9-NEXT: [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV84]], [[UV86]] - ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV85]], [[UV87]], [[USUBO15]] - ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV23]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD11]](i32) + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV33]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV31]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV32]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV27]], [[UV34]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV37]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV36]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV37]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV38]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV37]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV36]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV37]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV40]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV42]], [[UV44]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV43]], [[UV45]], [[USUBO7]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO42]](i32), [[UADDE12]](i32) + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UV50]], [[UV52]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO44]](i32), [[UADDE14]](i32) + ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV54]](i32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV55]](i32) + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV56]], [[UV58]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV57]], [[UV59]], [[USUBO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI2]], [[C5]] + ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV61]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV60]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV62]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV60]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH12]] + ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV62]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV60]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV62]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH14]] + ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD12]] + ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV62]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(i32), [[UADDE17:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDO56]], [[C5]] + ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV65]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDE16]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](i32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[UV64]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO56]], [[UV66]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[UV64]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH16]] + ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[UV66]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[UV64]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[UV66]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH18]] + ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD16]] + ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[UV66]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(i32), [[UADDE19:%[0-9]+]]:_(i1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(i32), [[UV69:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(i32), [[UV71:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV71]], [[UADDO68]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV70]], [[UADDE18]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV70]], [[UADDO68]] + ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH20]] + ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV71]], [[UADDE18]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV71]], [[UADDO68]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV70]], [[UADDE18]] + ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(i32), [[UADDO77:%[0-9]+]]:_(i1) = G_UADDO [[UADDO74]], [[UMULH22]] + ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO77]](i1) + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(i32), [[UADDO79:%[0-9]+]]:_(i1) = G_UADDO [[UADDO76]], [[ADD20]] + ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO79]](i1) + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV71]], [[UADDE18]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO78]](i32), [[ADD23]](i32) + ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(i32), [[UV73:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV72]](i32), [[UADDO78]], [[C5]] + ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(i32), [[UV75:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV75]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV72]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV73]](i32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(i32), [[UV77:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV68]], [[UV74]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV69]], [[UV76]] + ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(i32), [[UV79:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE12]](i32), [[UV79]] + ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV78]] + ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE12]](i32), [[UV79]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV78]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] + ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] + ; GFX9-NEXT: [[UV80:%[0-9]+]]:_(i32), [[UV81:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(i32), [[UADDO81:%[0-9]+]]:_(i1) = G_UADDO [[UADDO78]], [[UV80]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(i32), [[UADDE21:%[0-9]+]]:_(i1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO80]](i32), [[UADDE20]](i32) + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV79]] + ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV78]] + ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV79]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX9-NEXT: [[UV82:%[0-9]+]]:_(i32), [[UV83:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(i32), [[UADDO83:%[0-9]+]]:_(i1) = G_UADDO [[UADDO80]], [[UV82]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(i32), [[UADDE23:%[0-9]+]]:_(i1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO82]](i32), [[UADDE22]](i32) + ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(i64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX9-NEXT: [[UV84:%[0-9]+]]:_(i32), [[UV85:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR7]](i64) + ; GFX9-NEXT: [[UV86:%[0-9]+]]:_(i32), [[UV87:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR6]](i64) + ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[UV84]], [[UV86]] + ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[UV85]], [[UV87]], [[USUBO15]] + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE18]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; GFX10-LABEL: name: test_sdiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV30]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV31]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV30]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV31]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV32]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV33]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV34]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV35]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV36]], [[UV38]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO43]] - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO45]] - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) - ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV48]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV49]](s32) - ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV50]], [[UV52]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV51]], [[UV53]], [[USUBO9]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] - ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV54]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV54]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV54]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] - ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV56]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV56]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] - ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO68]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO68]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO68]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE18]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE18]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) - ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDO78]], [[C5]] - ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDO78]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV58]], [[UV64]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD35]] - ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV67]] - ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV66]] - ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV67]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV66]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV67]], [[USUBO11]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX10-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV68]] - ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV69]], [[UADDO81]] - ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV67]] - ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV66]] - ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV67]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV70]] - ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV71]], [[UADDO83]] - ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) - ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX10-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX10-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV72]], [[UV74]] - ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV73]], [[UV75]], [[USUBO15]] - ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_SDIV %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV19]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD15]](i32) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV26]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV27]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV31]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV30]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV31]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV30]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV31]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV32]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV33]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV31]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV30]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV31]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV34]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV35]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV36]], [[UV38]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[UV41]], [[UV43]], [[UADDO43]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO42]](i32), [[UADDE12]](i32) + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UV44]], [[UV46]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UV45]], [[UV47]], [[UADDO45]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO44]](i32), [[UADDE14]](i32) + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV48]](i32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV49]](i32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV50]], [[UV52]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV51]], [[UV53]], [[USUBO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI2]], [[C5]] + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[UV55]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[USUBE10]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV54]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV54]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV54]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH14]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD22]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(i32), [[UADDE17:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDO56]], [[C5]] + ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](i64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[UADDE16]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[UV57]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[USUBE10]], [[UADDO56]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[UV56]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[UV56]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[UV56]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD28]] + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(i32), [[UADDE19:%[0-9]+]]:_(i1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[UV61]], [[UADDO68]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[UV60]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV60]], [[UADDO68]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH20]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UV61]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV61]], [[UADDO68]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV60]], [[UADDE18]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(i32), [[UADDO77:%[0-9]+]]:_(i1) = G_UADDO [[UADDO74]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO77]](i1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(i32), [[UADDO79:%[0-9]+]]:_(i1) = G_UADDO [[UADDO76]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO79]](i1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV61]], [[UADDE18]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO78]](i32), [[ADD33]](i32) + ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV62]](i32), [[UADDO78]], [[C5]] + ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UV62]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[UV65]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UV63]], [[UADDO78]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV58]], [[UV64]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV59]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV59]], [[ADD35]] + ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE12]](i32), [[UV67]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV66]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE12]](i32), [[UV67]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV66]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV67]], [[USUBO11]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] + ; GFX10-NEXT: [[UV68:%[0-9]+]]:_(i32), [[UV69:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(i32), [[UADDO81:%[0-9]+]]:_(i1) = G_UADDO [[UADDO78]], [[UV68]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(i32), [[UADDE21:%[0-9]+]]:_(i1) = G_UADDE [[ADD33]], [[UV69]], [[UADDO81]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO80]](i32), [[UADDE20]](i32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV67]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV66]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV67]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[UV70:%[0-9]+]]:_(i32), [[UV71:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(i32), [[UADDO83:%[0-9]+]]:_(i1) = G_UADDO [[UADDO80]], [[UV70]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(i32), [[UADDE23:%[0-9]+]]:_(i1) = G_UADDE [[UADDE20]], [[UV71]], [[UADDO83]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO82]](i32), [[UADDE22]](i32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(i64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX10-NEXT: [[UV72:%[0-9]+]]:_(i32), [[UV73:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR7]](i64) + ; GFX10-NEXT: [[UV74:%[0-9]+]]:_(i32), [[UV75:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR6]](i64) + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[UV72]], [[UV74]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[UV73]], [[UV75]], [[USUBO15]] + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE18]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_SDIV %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -2510,170 +2522,173 @@ body: | ; GFX6-LABEL: name: test_sdiv_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX8-LABEL: name: test_sdiv_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX9-LABEL: name: test_sdiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX10-LABEL: name: test_sdiv_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SDIV %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SDIV %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -2685,325 +2700,328 @@ body: | ; GFX6-LABEL: name: test_sdiv_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX6-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX6-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C5]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB7]], [[C5]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR5]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX6-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX6-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB3]], [[C5]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB7]], [[C5]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX8-LABEL: name: test_sdiv_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C5]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB7]], [[C5]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR5]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB3]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB7]], [[C5]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX9-LABEL: name: test_sdiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB3]](s32) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SUB3]](i32) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR5]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; ; GFX10-LABEL: name: test_sdiv_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB3]](s32) - ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] - ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] - ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB7]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SUB3]](i32) + ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR5]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR5]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[XOR5]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD8]], [[UMULH3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[XOR5]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD9]], [[SELECT3]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(i32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(i32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB7]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SDIV %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -3015,170 +3033,173 @@ body: | ; GFX6-LABEL: name: test_sdiv_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX8-LABEL: name: test_sdiv_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX9-LABEL: name: test_sdiv_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX10-LABEL: name: test_sdiv_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SDIV %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SDIV %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -3190,170 +3211,173 @@ body: | ; GFX6-LABEL: name: test_sdiv_s17 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX8-LABEL: name: test_sdiv_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX9-LABEL: name: test_sdiv_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](i32) + ; ; GFX10-LABEL: name: test_sdiv_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_SDIV %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_SDIV %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -3365,704 +3389,707 @@ body: | ; GFX6-LABEL: name: test_sdiv_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD15]](i32) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV28]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX8-LABEL: name: test_sdiv_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD11]](i32) + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV40]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX9-LABEL: name: test_sdiv_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD11]](i32) + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV40]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX10-LABEL: name: test_sdiv_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO7]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_SDIV %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[ADD15]](i32) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UADDO36]], [[UV28]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UV30]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV32]], [[UV34]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV33]], [[UV35]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE8]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_SDIV %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index 12bcecd6db586..af0c42379e0b3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -9,21 +9,21 @@ body: | ; CHECK-LABEL: name: test_select_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_CONSTANT i32 1 - %4:_(s32) = G_CONSTANT i32 2 - %5:_(s32) = G_SELECT %2, %3, %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i32) = G_CONSTANT i32 1 + %4:_(i32) = G_CONSTANT i32 2 + %5:_(i32) = G_SELECT %2(i1), %3, %4 + $vgpr0 = COPY %5(i32) + ... @@ -35,21 +35,21 @@ body: | ; CHECK-LABEL: name: test_select_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s64) = G_CONSTANT i64 1 - %4:_(s64) = G_CONSTANT i64 2 - %5:_(s64) = G_SELECT %2, %3, %4 - $vgpr0_vgpr1 = COPY %5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i64) = G_CONSTANT i64 1 + %4:_(i64) = G_CONSTANT i64 2 + %5:_(i64) = G_SELECT %2(i1), %3, %4 + $vgpr0_vgpr1 = COPY %5(i64) + ... @@ -61,22 +61,22 @@ body: | ; CHECK-LABEL: name: test_select_s48 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s48) = G_CONSTANT i48 1 - %4:_(s48) = G_CONSTANT i48 2 - %5:_(s48) = G_SELECT %2, %3, %4 - %6:_(s64) = G_ANYEXT %5 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i48) = G_CONSTANT i48 1 + %4:_(i48) = G_CONSTANT i48 2 + %5:_(i48) = G_SELECT %2(i1), %3, %4 + %6:_(i64) = G_ANYEXT %5(i48) + $vgpr0_vgpr1 = COPY %6(i64) + ... @@ -88,23 +88,23 @@ body: | ; CHECK-LABEL: name: test_select_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s16) = G_CONSTANT i16 1 - %4:_(s16) = G_CONSTANT i16 2 - %5:_(s16) = G_SELECT %2, %3, %4 - %6:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i16) = G_CONSTANT i16 1 + %4:_(i16) = G_CONSTANT i16 2 + %5:_(i16) = G_SELECT %2(i1), %3, %4 + %6:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %6(i32) + ... @@ -116,23 +116,23 @@ body: | ; CHECK-LABEL: name: test_select_s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s8) = G_CONSTANT i8 1 - %4:_(s8) = G_CONSTANT i8 2 - %5:_(s8) = G_SELECT %2, %3, %4 - %6:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i8) = G_CONSTANT i8 1 + %4:_(i8) = G_CONSTANT i8 2 + %5:_(i8) = G_SELECT %2(i1), %3, %4 + %6:_(i32) = G_ANYEXT %5(i8) + $vgpr0 = COPY %6(i32) + ... @@ -144,23 +144,23 @@ body: | ; CHECK-LABEL: name: test_select_s7 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s7) = G_CONSTANT i7 1 - %4:_(s7) = G_CONSTANT i7 2 - %5:_(s7) = G_SELECT %2, %3, %4 - %6:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i7) = G_CONSTANT i7 1 + %4:_(i7) = G_CONSTANT i7 2 + %5:_(i7) = G_SELECT %2(i1), %3, %4 + %6:_(i32) = G_ANYEXT %5(i7) + $vgpr0 = COPY %6(i32) + ... @@ -172,28 +172,28 @@ body: | ; CHECK-LABEL: name: test_select_s96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[EXTRACT2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT1]], [[EXTRACT3]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[SELECT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s32) = COPY $vgpr6 - %3:_(s32) = G_CONSTANT i32 0 - - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(s96) = G_SELECT %4, %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY]](i96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY1]](i96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY1]](i96), 64 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SELECT]](i64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[SELECT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i32) = COPY $vgpr6 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(i96) = G_SELECT %4(i1), %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %5(i96) + ... @@ -206,24 +206,24 @@ body: | ; CHECK-LABEL: name: test_select_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(s32) = COPY $vgpr8 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(s128) = G_SELECT %4, %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](i128) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(i32) = COPY $vgpr8 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(i128) = G_SELECT %4(i1), %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(i128) ... @@ -236,34 +236,34 @@ body: | ; CHECK-LABEL: name: test_select_v2s8 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<2 x s32>) = COPY $vgpr1_vgpr2 - %3:_(<2 x s32>) = COPY $vgpr3_vgpr4 - %4:_(<2 x s8>) = G_TRUNC %2 - %5:_(<2 x s8>) = G_TRUNC %3 - - %6:_(s1) = G_ICMP intpred(ne), %0, %1 - %7:_(<2 x s8>) = G_SELECT %6, %4, %5 - %8:_(<2 x s32>) = G_ANYEXT %7 - $vgpr0_vgpr1 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<2 x i32>) = COPY $vgpr1_vgpr2 + %3:_(<2 x i32>) = COPY $vgpr3_vgpr4 + %4:_(<2 x i8>) = G_TRUNC %2(<2 x i32>) + %5:_(<2 x i8>) = G_TRUNC %3(<2 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %7:_(<2 x i8>) = G_SELECT %6(i1), %4, %5 + %8:_(<2 x i32>) = G_ANYEXT %7(<2 x i8>) + $vgpr0_vgpr1 = COPY %8(<2 x i32>) + ... @@ -276,38 +276,38 @@ body: | ; CHECK-LABEL: name: test_select_v3s8 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 - %3:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - %4:_(<3 x s8>) = G_TRUNC %2 - %5:_(<3 x s8>) = G_TRUNC %3 - - %6:_(s1) = G_ICMP intpred(ne), %0, %1 - %7:_(<3 x s8>) = G_SELECT %6, %4, %5 - %8:_(<3 x s32>) = G_ANYEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT2]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<3 x i32>) = COPY $vgpr1_vgpr2_vgpr3 + %3:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + %4:_(<3 x i8>) = G_TRUNC %2(<3 x i32>) + %5:_(<3 x i8>) = G_TRUNC %3(<3 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %7:_(<3 x i8>) = G_SELECT %6(i1), %4, %5 + %8:_(<3 x i32>) = G_ANYEXT %7(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<3 x i32>) + ... @@ -320,42 +320,42 @@ body: | ; CHECK-LABEL: name: test_select_v4s8 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC6]], [[TRUNC7]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(<4 x s32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 - %3:_(<4 x s32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - %4:_(<4 x s8>) = G_TRUNC %2 - %5:_(<4 x s8>) = G_TRUNC %3 - - %6:_(s1) = G_ICMP intpred(ne), %0, %1 - %7:_(<4 x s8>) = G_SELECT %6, %4, %5 - %8:_(<4 x s32>) = G_ANYEXT %7 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV4]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV5]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC6]], [[TRUNC7]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT2]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT3]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32), [[ANYEXT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(<4 x i32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + %3:_(<4 x i32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + %4:_(<4 x i8>) = G_TRUNC %2(<4 x i32>) + %5:_(<4 x i8>) = G_TRUNC %3(<4 x i32>) + %6:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %7:_(<4 x i8>) = G_SELECT %6(i1), %4, %5 + %8:_(<4 x i32>) = G_ANYEXT %7(<4 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %8(<4 x i32>) + ... @@ -367,21 +367,21 @@ body: | ; CHECK-LABEL: name: test_select_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x s16>) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(<2 x s16>) = G_SELECT %4, %0, %1 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x i16>) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(<2 x i16>) = G_SELECT %4(i1), %0, %1 + $vgpr0 = COPY %5(<2 x i16>) + ... @@ -393,47 +393,47 @@ body: | ; CHECK-LABEL: name: test_select_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s32) = COPY $vgpr6 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(<3 x s16>), %6:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %7:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %9:_(<3 x s16>) = G_SELECT %4, %5, %7 - %10:_(<3 x s16>) = G_IMPLICIT_DEF - %11:_(<6 x s16>) = G_CONCAT_VECTORS %9, %10 - $vgpr0_vgpr1_vgpr2 = COPY %11 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x i16>) = G_SELECT [[ICMP]](i1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[SELECT]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV6]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i32) = COPY $vgpr6 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(<3 x i16>), %6:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %7:_(<3 x i16>), %8:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %9:_(<3 x i16>) = G_SELECT %4(i1), %5, %7 + %10:_(<3 x i16>) = G_IMPLICIT_DEF + %11:_(<6 x i16>) = G_CONCAT_VECTORS %9(<3 x i16>), %10(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... @@ -445,21 +445,21 @@ body: | ; CHECK-LABEL: name: test_select_v4s16 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](<4 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 - %2:_(<4 x s16>) = COPY $vgpr3_vgpr4 - %4:_(s32) = G_CONSTANT i32 0 - - %5:_(s1) = G_ICMP intpred(ne), %0, %4 - %6:_(<4 x s16>) = G_SELECT %5, %1, %2 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x i16>) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](<4 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i16>) = COPY $vgpr1_vgpr2 + %2:_(<4 x i16>) = COPY $vgpr3_vgpr4 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %3 + %5:_(<4 x i16>) = G_SELECT %4(i1), %1, %2 + $vgpr0_vgpr1 = COPY %5(<4 x i16>) + ... @@ -471,21 +471,21 @@ body: | ; CHECK-LABEL: name: test_select_v2s32 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x s32>) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(<2 x s32>) = COPY $vgpr1_vgpr2 - %2:_(<2 x s32>) = COPY $vgpr3_vgpr4 - %4:_(s32) = G_CONSTANT i32 0 - - %5:_(s1) = G_ICMP intpred(ne), %0, %4 - %6:_(<2 x s32>) = G_SELECT %5, %1, %2 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x i32>) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(<2 x i32>) = COPY $vgpr1_vgpr2 + %2:_(<2 x i32>) = COPY $vgpr3_vgpr4 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %3 + %5:_(<2 x i32>) = G_SELECT %4(i1), %1, %2 + $vgpr0_vgpr1 = COPY %5(<2 x i32>) + ... @@ -497,26 +497,26 @@ body: | ; CHECK-LABEL: name: test_select_v3s32 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV4]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 - %3:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<3 x s32>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV4]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(<3 x i32>) = COPY $vgpr1_vgpr2_vgpr3 + %3:_(<3 x i32>) = COPY $vgpr4_vgpr5_vgpr6 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<3 x i32>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) + ... @@ -528,25 +528,25 @@ body: | ; CHECK-LABEL: name: test_select_v4s32 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x s32>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(<2 x s32>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[SELECT]](<2 x s32>), [[SELECT1]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - %2:_(<4 x s32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 - %3:_(<4 x s32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x s32>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x i32>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(<2 x i32>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[SELECT]](<2 x i32>), [[SELECT1]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(<4 x i32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + %3:_(<4 x i32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x i32>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<4 x i32>) + ... @@ -558,25 +558,25 @@ body: | ; CHECK-LABEL: name: test_select_v2s64 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 - %2:_(<2 x s64>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 - %3:_(<2 x s64>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s64>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY2]](<2 x i64>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 + %2:_(<2 x i64>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + %3:_(<2 x i64>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x i64>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x i64>) + ... @@ -588,21 +588,21 @@ body: | ; CHECK-LABEL: name: test_select_p0 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr1_vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p0) = COPY $vgpr1_vgpr2 %3:_(p0) = COPY $vgpr3_vgpr4 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p0) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1 = COPY %5(p0) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p0) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 ... @@ -614,21 +614,21 @@ body: | ; CHECK-LABEL: name: test_select_p1 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr1_vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p1) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p1) = COPY $vgpr1_vgpr2 %3:_(p1) = COPY $vgpr3_vgpr4 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p1) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1 = COPY %5(p1) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p1) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 ... @@ -640,21 +640,21 @@ body: | ; CHECK-LABEL: name: test_select_p2 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p2) = COPY $vgpr2 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p2) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p2) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](p2) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p2) = COPY $vgpr1 %3:_(p2) = COPY $vgpr2 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p2) = G_SELECT %4(i1), %2, %3 + $vgpr0 = COPY %5(p2) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p2) = G_SELECT %4, %2, %3 - $vgpr0 = COPY %5 ... @@ -666,21 +666,21 @@ body: | ; CHECK-LABEL: name: test_select_p3 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](p3) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p3) = COPY $vgpr1 %3:_(p3) = COPY $vgpr2 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p3) = G_SELECT %4(i1), %2, %3 + $vgpr0 = COPY %5(p3) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p3) = G_SELECT %4, %2, %3 - $vgpr0 = COPY %5 ... @@ -692,21 +692,21 @@ body: | ; CHECK-LABEL: name: test_select_p4 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr1_vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p4) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p4) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p4) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p4) = COPY $vgpr1_vgpr2 %3:_(p4) = COPY $vgpr3_vgpr4 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p4) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1 = COPY %5(p4) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p4) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 ... @@ -719,20 +719,20 @@ body: | ; CHECK-LABEL: name: test_select_p5 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY $vgpr2 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](p5) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p5) = COPY $vgpr1 %3:_(p5) = COPY $vgpr2 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p5) = G_SELECT %4, %2, %3 - $vgpr0 = COPY %5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p5) = G_SELECT %4(i1), %2, %3 + $vgpr0 = COPY %5(p5) ... @@ -745,20 +745,20 @@ body: | ; CHECK-LABEL: name: test_select_p999 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr1_vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p999) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p999) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p999) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p999) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(p999) = COPY $vgpr1_vgpr2 %3:_(p999) = COPY $vgpr3_vgpr4 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p999) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p999) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1 = COPY %5(p999) ... @@ -772,20 +772,20 @@ body: | ; CHECK-LABEL: name: test_select_v2p3 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr1_vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x p3>) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x p3>) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](<2 x p3>) - %0:_(s32) = COPY $vgpr0 + %0:_(i32) = COPY $vgpr0 %1:_(<2 x p3>) = COPY $vgpr1_vgpr2 %2:_(<2 x p3>) = COPY $vgpr3_vgpr4 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(ne), %0, %4 - %6:_(<2 x p3>) = G_SELECT %5, %1, %2 - $vgpr0_vgpr1 = COPY %6 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %3 + %5:_(<2 x p3>) = G_SELECT %4(i1), %1, %2 + $vgpr0_vgpr1 = COPY %5(<2 x p3>) ... @@ -798,26 +798,26 @@ body: | ; CHECK-LABEL: name: test_select_v3p3 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x p3>) = COPY $vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x p3>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<3 x p3>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY2]](<3 x p3>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[UV]], [[UV3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV4]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV5]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[UV]], [[UV3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV4]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV5]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3), [[SELECT2]](p3) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x p3>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(<3 x p3>) = COPY $vgpr1_vgpr2_vgpr3 %3:_(<3 x p3>) = COPY $vgpr4_vgpr5_vgpr6 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<3 x p3>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x p3>) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<3 x p3>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2 = COPY %5 ... @@ -830,25 +830,25 @@ body: | ; CHECK-LABEL: name: test_select_v4p3 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x p3>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x p3>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x p3>), [[UV1:%[0-9]+]]:_(<2 x p3>) = G_UNMERGE_VALUES [[COPY1]](<4 x p3>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x p3>), [[UV3:%[0-9]+]]:_(<2 x p3>) = G_UNMERGE_VALUES [[COPY2]](<4 x p3>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x p3>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(<2 x p3>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x p3>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(<2 x p3>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p3>) = G_CONCAT_VECTORS [[SELECT]](<2 x p3>), [[SELECT1]](<2 x p3>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(<4 x p3>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %3:_(<4 x p3>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x p3>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<4 x p3>) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x p3>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 ... @@ -861,25 +861,25 @@ body: | ; CHECK-LABEL: name: test_select_v4p5 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x p5>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x p5>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x p5>), [[UV1:%[0-9]+]]:_(<2 x p5>) = G_UNMERGE_VALUES [[COPY1]](<4 x p5>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x p5>), [[UV3:%[0-9]+]]:_(<2 x p5>) = G_UNMERGE_VALUES [[COPY2]](<4 x p5>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x p5>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(<2 x p5>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<2 x p5>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(<2 x p5>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p5>) = G_CONCAT_VECTORS [[SELECT]](<2 x p5>), [[SELECT1]](<2 x p5>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(<4 x p5>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %3:_(<4 x p5>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x p5>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<4 x p5>) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x p5>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 ... @@ -892,25 +892,25 @@ body: | ; CHECK-LABEL: name: test_select_v2p0 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY1]](<2 x p0>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(p0), [[UV3:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY2]](<2 x p0>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(<2 x p0>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %3:_(<2 x p0>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x p0>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x p0>) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x p0>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 ... @@ -923,27 +923,27 @@ body: | ; CHECK-LABEL: name: test_select_v4p0 ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x p0>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x p0>) = COPY $vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0), [[UV2:%[0-9]+]]:_(p0), [[UV3:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY1]](<4 x p0>) ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(p0), [[UV5:%[0-9]+]]:_(p0), [[UV6:%[0-9]+]]:_(p0), [[UV7:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY2]](<4 x p0>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV]], [[UV4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV6]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV7]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV]], [[UV4]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV5]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV6]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV7]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0), [[SELECT2]](p0), [[SELECT3]](p0) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x p0>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(<4 x p0>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 %3:_(<4 x p0>) = COPY $vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x p0>) = G_SELECT %4(i1), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(<4 x p0>) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x p0>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 ... @@ -956,38 +956,38 @@ body: | ; CHECK-LABEL: name: test_select_v2s96 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s96>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s96>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s96), [[UV1:%[0-9]+]]:_(s96) = G_UNMERGE_VALUES [[DEF]](<2 x s96>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s96), [[UV3:%[0-9]+]]:_(s96) = G_UNMERGE_VALUES [[DEF1]](<2 x s96>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[UV]](s96), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[UV]](s96), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[UV2]](s96), 0 - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[UV2]](s96), 64 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[EXTRACT2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT1]], [[EXTRACT3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32), [[SELECT1]](s32) - ; CHECK-NEXT: [[EXTRACT4:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s96), 0 - ; CHECK-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[UV1]](s96), 64 - ; CHECK-NEXT: [[EXTRACT6:%[0-9]+]]:_(s64) = G_EXTRACT [[UV3]](s96), 0 - ; CHECK-NEXT: [[EXTRACT7:%[0-9]+]]:_(s32) = G_EXTRACT [[UV3]](s96), 64 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT4]], [[EXTRACT6]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT5]], [[EXTRACT7]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT2]](s64) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[MV]](s96), [[MV1]](s96) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s96>) - %0:_(<2 x s96>) = G_IMPLICIT_DEF - %1:_(<2 x s96>) = G_IMPLICIT_DEF - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(<2 x s96>) = G_SELECT %4, %0, %1 - S_NOP 0, implicit %5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x i96>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x i96>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i96), [[UV1:%[0-9]+]]:_(i96) = G_UNMERGE_VALUES [[DEF]](<2 x i96>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i96), [[UV3:%[0-9]+]]:_(i96) = G_UNMERGE_VALUES [[DEF1]](<2 x i96>) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[UV]](i96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(i32) = G_EXTRACT [[UV]](i96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(i64) = G_EXTRACT [[UV2]](i96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(i32) = G_EXTRACT [[UV2]](i96), 64 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SELECT]](i64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV4]](i32), [[UV5]](i32), [[SELECT1]](i32) + ; CHECK-NEXT: [[EXTRACT4:%[0-9]+]]:_(i64) = G_EXTRACT [[UV1]](i96), 0 + ; CHECK-NEXT: [[EXTRACT5:%[0-9]+]]:_(i32) = G_EXTRACT [[UV1]](i96), 64 + ; CHECK-NEXT: [[EXTRACT6:%[0-9]+]]:_(i64) = G_EXTRACT [[UV3]](i96), 0 + ; CHECK-NEXT: [[EXTRACT7:%[0-9]+]]:_(i32) = G_EXTRACT [[UV3]](i96), 64 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[EXTRACT4]], [[EXTRACT6]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[EXTRACT5]], [[EXTRACT7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SELECT2]](i64) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV6]](i32), [[UV7]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i96>) = G_BUILD_VECTOR [[MV]](i96), [[MV1]](i96) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i96>) + %0:_(<2 x i96>) = G_IMPLICIT_DEF + %1:_(<2 x i96>) = G_IMPLICIT_DEF + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(<2 x i96>) = G_SELECT %4(i1), %0, %1 + S_NOP 0, implicit %5(<2 x i96>) ... @@ -1001,31 +1001,31 @@ body: | ; CHECK-LABEL: name: test_select_v8p0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x p0>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x p0>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[C]](i32), [[COPY]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0), [[UV2:%[0-9]+]]:_(p0), [[UV3:%[0-9]+]]:_(p0), [[UV4:%[0-9]+]]:_(p0), [[UV5:%[0-9]+]]:_(p0), [[UV6:%[0-9]+]]:_(p0), [[UV7:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[DEF]](<8 x p0>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(p0), [[UV9:%[0-9]+]]:_(p0), [[UV10:%[0-9]+]]:_(p0), [[UV11:%[0-9]+]]:_(p0), [[UV12:%[0-9]+]]:_(p0), [[UV13:%[0-9]+]]:_(p0), [[UV14:%[0-9]+]]:_(p0), [[UV15:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[DEF1]](<8 x p0>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV]], [[UV8]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV9]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV11]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV12]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV5]], [[UV13]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV6]], [[UV14]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV7]], [[UV15]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV]], [[UV8]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV9]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV10]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV11]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV4]], [[UV12]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV5]], [[UV13]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV6]], [[UV14]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV7]], [[UV15]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0), [[SELECT2]](p0), [[SELECT3]](p0), [[SELECT4]](p0), [[SELECT5]](p0), [[SELECT6]](p0), [[SELECT7]](p0) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x p0>) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = COPY $vgpr0 + %0:_(i32) = G_CONSTANT i32 0 + %1:_(i32) = COPY $vgpr0 %2:_(<8 x p0>) = G_IMPLICIT_DEF %3:_(<8 x p0>) = G_IMPLICIT_DEF + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<8 x p0>) = G_SELECT %4(i1), %2, %3 + S_NOP 0, implicit %5(<8 x p0>) - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<8 x p0>) = G_SELECT %4, %2, %3 - S_NOP 0, implicit %5 ... --- name: test_select_v2s128 @@ -1036,32 +1036,32 @@ body: | ; CHECK-LABEL: name: test_select_v2s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s128), [[UV3:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY1]](<2 x s128>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV2]](s128) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV5]], [[UV7]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT1]](s64) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV3]](s128) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV8]], [[UV10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV9]], [[UV11]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT2]](s64), [[SELECT3]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 , - %1:_(<2 x s128>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %2:_(s32) = COPY $vgpr16 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(<2 x s128>) = G_SELECT %4, %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i128), [[UV3:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY1]](<2 x i128>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV2]](i128) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV4]], [[UV6]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV5]], [[UV7]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT1]](i64) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV3]](i128) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV8]], [[UV10]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV9]], [[UV11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT2]](i64), [[SELECT3]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i128>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %2:_(i32) = COPY $vgpr16 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(<2 x i128>) = G_SELECT %4(i1), %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(<2 x i128>) ... @@ -1073,28 +1073,28 @@ body: | ; CHECK-LABEL: name: test_vselect_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = COPY $vgpr6_vgpr7 - - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV4]], [[UV6]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i32>) = COPY $vgpr6_vgpr7 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %1 + %5:_(<2 x i32>) = G_SELECT %4(<2 x i1>), %2, %3 + $vgpr0_vgpr1 = COPY %5(<2 x i32>) + ... @@ -1106,28 +1106,28 @@ body: | ; CHECK-LABEL: name: test_vselect_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV6]], [[UV9]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV7]], [[UV10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[UV8]], [[UV11]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - - %3:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<3 x s32>) = G_SELECT %3, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV6]], [[UV9]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[UV7]], [[UV10]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[UV8]], [[UV11]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %4:_(<3 x i32>) = G_SELECT %3(<3 x i1>), %1, %2 + $vgpr0_vgpr1_vgpr2 = COPY %4(<3 x i32>) + ... @@ -1139,30 +1139,30 @@ body: | ; CHECK-LABEL: name: test_vselect_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV8]], [[UV12]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV9]], [[UV13]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[UV10]], [[UV14]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[UV11]], [[UV15]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - - %3:_(<4 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<4 x s32>) = G_SELECT %3, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV5]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV3]](i32), [[UV7]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[UV8]], [[UV12]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[UV9]], [[UV13]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[UV10]], [[UV14]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[UV11]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x i1>) = G_ICMP intpred(ne), %0(<4 x i32>), %1 + %4:_(<4 x i32>) = G_SELECT %3(<4 x i1>), %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<4 x i32>) + ... @@ -1174,26 +1174,26 @@ body: | ; CHECK-LABEL: name: test_vselect_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[UV3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - - %3:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<2 x s64>) = G_SELECT %3, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY2]](<2 x i64>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[UV4]], [[UV6]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV5]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i64>), %1 + %4:_(<2 x i64>) = G_SELECT %3(<2 x i1>), %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<2 x i64>) + ... @@ -1205,28 +1205,28 @@ body: | ; CHECK-LABEL: name: test_vselect_v2p3 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY2]](<2 x p3>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY3]](<2 x p3>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](i1), [[UV4]], [[UV6]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](i1), [[UV5]], [[UV7]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 %2:_(<2 x p3>) = COPY $vgpr4_vgpr5 %3:_(<2 x p3>) = COPY $vgpr6_vgpr7 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %1 + %5:_(<2 x p3>) = G_SELECT %4(<2 x i1>), %2, %3 + $vgpr0_vgpr1 = COPY %5(<2 x p3>) - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x p3>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1 = COPY %5 ... @@ -1239,28 +1239,28 @@ body: | ; CHECK-LABEL: name: test_vselect_v2p0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[UV3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[UV3]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(p0), [[UV5:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY2]](<2 x p0>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(p0), [[UV7:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY3]](<2 x p0>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](i1), [[UV4]], [[UV6]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](i1), [[UV5]], [[UV7]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x p0>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 %3:_(<2 x p0>) = COPY $vgpr12_vgpr13_vgpr14_vgpr15 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i64>), %1 + %5:_(<2 x p0>) = G_SELECT %4(<2 x i1>), %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5(<2 x p0>) - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x p0>) = G_SELECT %4, %2, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 ... @@ -1272,41 +1272,41 @@ body: | ; CHECK-LABEL: name: test_vselect_v3s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11 - %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %5:_(<3 x s8>) = G_TRUNC %2 - %6:_(<3 x s8>) = G_TRUNC %3 - %7:_(<3 x s8>) = G_SELECT %4, %5, %6 - %8:_(<3 x s32>) = G_ANYEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV6]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV9]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV7]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV10]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[UV8]](i32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[UV11]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT2]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i32>) = COPY $vgpr9_vgpr10_vgpr11 + %4:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %5:_(<3 x i8>) = G_TRUNC %2(<3 x i32>) + %6:_(<3 x i8>) = G_TRUNC %3(<3 x i32>) + %7:_(<3 x i8>) = G_SELECT %4(<3 x i1>), %5, %6 + %8:_(<3 x i32>) = G_ANYEXT %7(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<3 x i32>) ... @@ -1318,66 +1318,66 @@ body: | ; CHECK-LABEL: name: test_vselect_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC4]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC5]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11 - %4:_(<3 x s1>) = G_ICMP intpred(ne), %2, %3 - %5:_(<3 x s16>), %6:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %7:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %9:_(<3 x s16>) = G_SELECT %4, %5, %7 - %10:_(<3 x s16>) = G_IMPLICIT_DEF - %11:_(<6 x s16>) = G_CONCAT_VECTORS %9, %10 - $vgpr0_vgpr1_vgpr2 = COPY %11 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[TRUNC]], [[TRUNC3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[TRUNC1]], [[TRUNC4]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV13]](<2 x i16>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT2]](i16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i32>) = COPY $vgpr9_vgpr10_vgpr11 + %4:_(<3 x i1>) = G_ICMP intpred(ne), %2(<3 x i32>), %3 + %5:_(<3 x i16>), %6:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %7:_(<3 x i16>), %8:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %9:_(<3 x i16>) = G_SELECT %4(<3 x i1>), %5, %7 + %10:_(<3 x i16>) = G_IMPLICIT_DEF + %11:_(<6 x i16>) = G_CONCAT_VECTORS %9(<3 x i16>), %10(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %11(<6 x i16>) ... --- @@ -1389,30 +1389,30 @@ body: | ; CHECK-LABEL: name: test_select_s1 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %3 - %5:_(s1) = G_ICMP intpred(ne), %1, %3 - %6:_(s1) = G_ICMP intpred(ne), %2, %3 - %7:_(s1) = G_SELECT %4, %5, %6 - %8:_(s32) = G_ZEXT %7 - $vgpr0 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(ne), %1(i32), %3 + %6:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %7:_(i1) = G_SELECT %4(i1), %5, %6 + %8:_(i32) = G_ZEXT %7(i1) + $vgpr0 = COPY %8(i32) ... @@ -1425,45 +1425,45 @@ body: | ; CHECK-LABEL: name: test_select_v2s1 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV10]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV9]](s32), [[UV11]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP3]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = COPY $vgpr6_vgpr7 - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %3 - %5:_(<2 x s1>) = G_ICMP intpred(ne), %1, %3 - %6:_(<2 x s1>) = G_ICMP intpred(ne), %2, %3 - %7:_(<2 x s1>) = G_SELECT %4, %5, %6 - %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV4]](i32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV5]](i32), [[UV7]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV8]](i32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV9]](i32), [[UV11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP4]](i1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP3]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP5]](i1) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[ANYEXT2]], [[ANYEXT3]] + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT4]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT5]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i32>) = COPY $vgpr6_vgpr7 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %3 + %5:_(<2 x i1>) = G_ICMP intpred(ne), %1(<2 x i32>), %3 + %6:_(<2 x i1>) = G_ICMP intpred(ne), %2(<2 x i32>), %3 + %7:_(<2 x i1>) = G_SELECT %4(<2 x i1>), %5, %6 + %8:_(<2 x i32>) = G_ZEXT %7(<2 x i1>) + $vgpr0_vgpr1 = COPY %8(<2 x i32>) ... @@ -1476,52 +1476,52 @@ body: | ; CHECK-LABEL: name: test_select_v3s1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV12]](s32), [[UV15]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV13]](s32), [[UV16]] - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV14]](s32), [[UV17]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP3]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP6]](s1) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP7]](s1) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP8]](s1) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11 - %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %3 - %5:_(<3 x s1>) = G_ICMP intpred(ne), %1, %3 - %6:_(<3 x s1>) = G_ICMP intpred(ne), %2, %3 - %7:_(<3 x s1>) = G_SELECT %4, %5, %6 - %8:_(<3 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV6]](i32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV7]](i32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV8]](i32), [[UV11]] + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV12]](i32), [[UV15]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV13]](i32), [[UV16]] + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV14]](i32), [[UV17]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP3]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP6]](i1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP4]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP7]](i1) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[ANYEXT2]], [[ANYEXT3]] + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP5]](i1) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i16) = G_ANYEXT [[ICMP8]](i1) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[ANYEXT4]], [[ANYEXT5]] + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT2]](i16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT6]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT7]], [[C]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT8]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32), [[AND2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i32>) = COPY $vgpr9_vgpr10_vgpr11 + %4:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %3 + %5:_(<3 x i1>) = G_ICMP intpred(ne), %1(<3 x i32>), %3 + %6:_(<3 x i1>) = G_ICMP intpred(ne), %2(<3 x i32>), %3 + %7:_(<3 x i1>) = G_SELECT %4(<3 x i1>), %5, %6 + %8:_(<3 x i32>) = G_ZEXT %7(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index 40c48e10f933f..f469280ca86bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -14,26 +14,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s32_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 1 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX8-LABEL: name: test_sext_inreg_s32_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 1 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX6-LABEL: name: test_sext_inreg_s32_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SEXT_INREG %0, 1 - $vgpr0 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 1 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_SEXT_INREG %0, 1 + $vgpr0 = COPY %1(i32) ... --- @@ -45,26 +45,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s32_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 2 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX8-LABEL: name: test_sext_inreg_s32_2 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 2 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX6-LABEL: name: test_sext_inreg_s32_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SEXT_INREG %0, 2 - $vgpr0 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 2 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_SEXT_INREG %0, 2 + $vgpr0 = COPY %1(i32) ... --- @@ -76,26 +76,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s32_8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX8-LABEL: name: test_sext_inreg_s32_8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX6-LABEL: name: test_sext_inreg_s32_8 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SEXT_INREG %0, 8 - $vgpr0 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_SEXT_INREG %0, 8 + $vgpr0 = COPY %1(i32) ... --- @@ -107,26 +107,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s32_16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX8-LABEL: name: test_sext_inreg_s32_16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX6-LABEL: name: test_sext_inreg_s32_16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SEXT_INREG %0, 8 - $vgpr0 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_SEXT_INREG %0, 8 + $vgpr0 = COPY %1(i32) ... --- @@ -138,26 +138,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s32_31 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 31 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX8-LABEL: name: test_sext_inreg_s32_31 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 31 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX6-LABEL: name: test_sext_inreg_s32_31 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SEXT_INREG %0, 31 - $vgpr0 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 31 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_SEXT_INREG %0, 31 + $vgpr0 = COPY %1(i32) ... --- @@ -169,26 +169,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 1 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 1 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 1 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 1 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 1 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -200,26 +200,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 2 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 2 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 2 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 2 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 2 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -231,26 +231,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -262,26 +262,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -293,26 +293,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_31 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 31 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_31 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 31 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_31 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 31 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 31 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 31 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -324,26 +324,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 32 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 32 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 32 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 32 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 32 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -355,26 +355,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_33 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_33 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 33 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 33 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -386,26 +386,26 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s64_63 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 63 + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX8-LABEL: name: test_sext_inreg_s64_63 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 63 + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) ; ; GFX6-LABEL: name: test_sext_inreg_s64_63 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 63 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 63 + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 63 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -417,32 +417,32 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s16_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) ; ; GFX8-LABEL: name: test_sext_inreg_s16_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) ; ; GFX6-LABEL: name: test_sext_inreg_s16_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_SEXT_INREG %1, 1 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 1 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_SEXT_INREG %1, 1 + S_ENDPGM 0, implicit %2(i16) ... @@ -455,32 +455,32 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s16_15 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 15 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 15 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) ; ; GFX8-LABEL: name: test_sext_inreg_s16_15 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) ; ; GFX6-LABEL: name: test_sext_inreg_s16_15 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 15 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_SEXT_INREG %1, 15 - S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 15 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_SEXT_INREG %1, 15 + S_ENDPGM 0, implicit %2(i16) ... @@ -493,47 +493,47 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s96_8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s96) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i96) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[MV]](i64), [[MV]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV1]](i192) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX8-LABEL: name: test_sext_inreg_s96_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s96) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i96) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[MV]](i64), [[MV]](i64) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV1]](i192) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX6-LABEL: name: test_sext_inreg_s96_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s96) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i96) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[MV]](i64), [[MV]](i64) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV1]](i192) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -545,38 +545,38 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s128_8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s128) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i128) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX8-LABEL: name: test_sext_inreg_s128_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s128) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i128) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX6-LABEL: name: test_sext_inreg_s128_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s128) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i128) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -588,47 +588,47 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_s160_8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s160) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i160) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i320) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[MV]](i64), [[MV]](i64), [[MV]](i64), [[MV]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i160) = G_TRUNC [[MV1]](i320) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](i160) ; ; GFX8-LABEL: name: test_sext_inreg_s160_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s160) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i160) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i320) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[MV]](i64), [[MV]](i64), [[MV]](i64), [[MV]](i64) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i160) = G_TRUNC [[MV1]](i320) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](i160) ; ; GFX6-LABEL: name: test_sext_inreg_s160_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s160) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) - %0:_(s160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - %1:_(s160) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i160) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i320) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[MV]](i64), [[MV]](i64), [[MV]](i64), [[MV]](i64) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i160) = G_TRUNC [[MV1]](i320) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](i160) + %0:_(i160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:_(i160) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(i160) ... --- @@ -640,38 +640,38 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_256_8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s256) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i256) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](i256) ; ; GFX8-LABEL: name: test_sext_inreg_256_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s256) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i256) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](i256) ; ; GFX6-LABEL: name: test_sext_inreg_256_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s256) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s256) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i256) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](i256) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i256) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... --- @@ -683,38 +683,38 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_512_8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s512) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i512) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](i512) ; ; GFX8-LABEL: name: test_sext_inreg_512_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s512) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i512) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](i512) ; ; GFX6-LABEL: name: test_sext_inreg_512_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s512) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s512) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i512) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](i512) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i512) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) ... --- @@ -726,38 +726,38 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_1024_8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s1024) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i1024) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](i1024) ; ; GFX8-LABEL: name: test_sext_inreg_1024_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s1024) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i1024) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](i1024) ; ; GFX6-LABEL: name: test_sext_inreg_1024_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s1024) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 8 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) - %0:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(s1024) = G_SEXT_INREG %0, 8 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[COPY]](i1024) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 8 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](i1024) + %0:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(i1024) = G_SEXT_INREG %0, 8 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %1(i1024) ... --- @@ -769,35 +769,35 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v2s32_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_sext_inreg_v2s32_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX6-LABEL: name: test_sext_inreg_v2s32_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_SEXT_INREG %0, 1 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_SEXT_INREG %0, 1 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -809,53 +809,53 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v2s16_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[ASHR]](<2 x i16>) ; ; GFX8-LABEL: name: test_sext_inreg_v2s16_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C1]](i16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX6-LABEL: name: test_sext_inreg_v2s16_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_SEXT_INREG %0, 1 - $vgpr0 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_SEXT_INREG %0, 1 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -866,123 +866,123 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v3s16_1 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV3]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: test_sext_inreg_v3s16_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C1]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[C1]](i16) + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR2]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX6-LABEL: name: test_sext_inreg_v3s16_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_SEXT_INREG %1, 1 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 1 + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_SEXT_INREG %1, 1 + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -994,38 +994,38 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v3s32_1 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX8-LABEL: name: test_sext_inreg_v3s32_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX6-LABEL: name: test_sext_inreg_v3s32_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_SEXT_INREG %0, 1 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = G_SEXT_INREG %0, 1 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -1037,41 +1037,41 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v4s32_1 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32), [[SEXT_INREG3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX8-LABEL: name: test_sext_inreg_v4s32_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32), [[SEXT_INREG3]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) ; ; GFX6-LABEL: name: test_sext_inreg_v4s32_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 - ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = G_SEXT_INREG %0, 1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 1 + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 1 + ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 1 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32), [[SEXT_INREG3]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = G_SEXT_INREG %0, 1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -1083,84 +1083,84 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v4s16_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[ASHR]](<2 x i16>), [[ASHR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: test_sext_inreg_v4s16_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C1]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C1]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[C1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[C1]](i16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR3]](i16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX6-LABEL: name: test_sext_inreg_v4s16_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_SEXT_INREG %0, 1 - $vgpr0_vgpr1 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 1 + ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 1 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG3]], [[C1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x i16>), [[BITCAST3]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_SEXT_INREG %0, 1 + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -1169,105 +1169,105 @@ body: | bb.0: ; GFX9-LABEL: name: test_sext_inreg_v6s16_1 - ; GFX9: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x i16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV2]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[ASHR]](<2 x i16>), [[ASHR1]](<2 x i16>), [[ASHR2]](<2 x i16>) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: test_sext_inreg_v6s16_1 - ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL4]], [[C1]](s16) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[SHL5]], [[C1]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) - ; GFX8-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR4]](s16) - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR5]](s16) - ; GFX8-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 15 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C1]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[C1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[C1]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[TRUNC4]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(i16) = G_ASHR [[SHL4]], [[C1]](i16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[TRUNC5]], [[C1]](i16) + ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(i16) = G_ASHR [[SHL5]], [[C1]](i16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR1]](i16) + ; GFX8-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR3]](i16) + ; GFX8-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR4]](i16) + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[ASHR5]](i16) + ; GFX8-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX6-LABEL: name: test_sext_inreg_v6s16_1 - ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1 - ; GFX6-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 1 - ; GFX6-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C1]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = G_IMPLICIT_DEF - %1:_(<6 x s16>) = G_SEXT_INREG %0, 1 - S_ENDPGM 0, implicit %1 + ; GFX6: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 1 + ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 1 + ; GFX6-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST2]], 1 + ; GFX6-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 1 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG3]], [[C1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG4]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[SEXT_INREG5]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = G_IMPLICIT_DEF + %1:_(<6 x i16>) = G_SEXT_INREG %0, 1 + S_ENDPGM 0, implicit %1(<6 x i16>) ... @@ -1281,57 +1281,57 @@ body: | ; GFX9-LABEL: name: test_sext_inreg_v2s128_1 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[UV]](s128) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[UV]](i128) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i64) = G_TRUNC [[UV1]](i128) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC1]], 1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG1]](i64), [[ASHR1]](i64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; GFX8-LABEL: name: test_sext_inreg_v2s128_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[UV]](s128) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[UV]](i128) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i64) = G_TRUNC [[UV1]](i128) + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC1]], 1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](i32) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG1]](i64), [[ASHR1]](i64) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; GFX6-LABEL: name: test_sext_inreg_v2s128_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[UV]](s128) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC]], 1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s128>) = G_SEXT_INREG %0, 1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[UV]](i128) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC]], 1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG]](i64), [[ASHR]](i64) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i64) = G_TRUNC [[UV1]](i128) + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[TRUNC1]], 1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](i32) + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SEXT_INREG1]](i64), [[ASHR1]](i64) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i128>) = G_SEXT_INREG %0, 1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<2 x i128>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir index 847ffc8aadc07..cf3af3e515ada 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_SEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_SEXT %0(i32) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -27,14 +27,14 @@ body: | ; CHECK-LABEL: name: test_sext_s16_to_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_SEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[ANYEXT]], 16 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_SEXT %1(i16) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -46,13 +46,13 @@ body: | ; CHECK-LABEL: name: test_sext_s16_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -64,13 +64,13 @@ body: | ; CHECK-LABEL: name: test_sext_s24_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 24 + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i24) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -82,13 +82,13 @@ body: | ; CHECK-LABEL: name: test_sext_i1_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 1 + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i1) + $vgpr0 = COPY %2(i32) ... --- @@ -100,17 +100,17 @@ body: | ; CHECK-LABEL: name: test_sext_v2s16_to_v2s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = G_SEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i32>) = G_SEXT %0(<2 x i16>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -122,21 +122,21 @@ body: | ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(<3 x s32>) = G_SEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(<3 x i32>) = G_SEXT %1(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -148,22 +148,22 @@ body: | ; CHECK-LABEL: name: test_sext_v4s16_to_v4s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_SEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32), [[SEXT_INREG2]](i32), [[SEXT_INREG3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i32>) = G_SEXT %0(<4 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -175,15 +175,15 @@ body: | ; CHECK-LABEL: name: test_sext_v2s32_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_SEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[UV]](i32) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SEXT]](i64), [[SEXT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i64>) = G_SEXT %0(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -195,16 +195,16 @@ body: | ; CHECK-LABEL: name: test_sext_v3s32_to_v3s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) - ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s64>) = G_SEXT %0 - S_NOP 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[UV]](i32) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[UV1]](i32) + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(i64) = G_SEXT [[UV2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[SEXT]](i64), [[SEXT1]](i64), [[SEXT2]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i64>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i64>) = G_SEXT %0(<3 x i32>) + S_NOP 0, implicit %1(<3 x i64>) ... @@ -217,17 +217,17 @@ body: | ; CHECK-LABEL: name: test_sext_v4s32_to_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) - ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) - ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s64>) = G_SEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[UV]](i32) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[UV1]](i32) + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(i64) = G_SEXT [[UV2]](i32) + ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(i64) = G_SEXT [[UV3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[SEXT]](i64), [[SEXT1]](i64), [[SEXT2]](i64), [[SEXT3]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i64>) = G_SEXT %0(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -239,16 +239,16 @@ body: | ; CHECK-LABEL: name: test_sext_s8_to_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s16) = G_SEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i16) = G_SEXT %1(i8) + S_ENDPGM 0, implicit %2(i16) ... --- @@ -260,14 +260,14 @@ body: | ; CHECK-LABEL: name: test_sext_s8_to_s24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[SEXT_INREG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s24) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s24) = G_SEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i24) = G_TRUNC [[SEXT_INREG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i24) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i24) = G_SEXT %1(i8) + S_ENDPGM 0, implicit %2(i24) ... --- @@ -279,13 +279,13 @@ body: | ; CHECK-LABEL: name: test_sext_s7_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i7) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -297,13 +297,13 @@ body: | ; CHECK-LABEL: name: test_sext_s8_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i8) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -315,17 +315,17 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s96 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s192) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96) - %0:_(s32) = COPY $vgpr0 - %1:_(s96) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i192) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i96) + %0:_(i32) = COPY $vgpr0 + %1:_(i96) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i96) ... --- @@ -337,16 +337,16 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s128 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128) - %0:_(s32) = COPY $vgpr0 - %1:_(s128) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i128) + %0:_(i32) = COPY $vgpr0 + %1:_(i128) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -358,17 +358,17 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s160 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s160) - %0:_(s32) = COPY $vgpr0 - %1:_(s160) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i320) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i160) = G_TRUNC [[MV2]](i320) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i160) + %0:_(i32) = COPY $vgpr0 + %1:_(i160) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i160) ... --- @@ -380,16 +380,16 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s192 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s192) - %0:_(s32) = COPY $vgpr0 - %1:_(s192) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i192) + %0:_(i32) = COPY $vgpr0 + %1:_(i192) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i192) ... --- @@ -401,17 +401,17 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s224 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224) - %0:_(s32) = COPY $vgpr0 - %1:_(s224) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i224) = G_TRUNC [[MV2]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i224) + %0:_(i32) = COPY $vgpr0 + %1:_(i224) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i224) ... --- @@ -423,16 +423,16 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s256 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s256) - %0:_(s32) = COPY $vgpr0 - %1:_(s256) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i256) + %0:_(i32) = COPY $vgpr0 + %1:_(i256) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -444,16 +444,16 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s512 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s512) - %0:_(s32) = COPY $vgpr0 - %1:_(s512) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i512) + %0:_(i32) = COPY $vgpr0 + %1:_(i512) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i512) ... --- @@ -465,17 +465,17 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s992 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224) - %0:_(s32) = COPY $vgpr0 - %1:_(s224) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i224) = G_TRUNC [[MV2]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i224) + %0:_(i32) = COPY $vgpr0 + %1:_(i224) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i224) ... --- @@ -488,16 +488,16 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s1024 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s1024) - %0:_(s32) = COPY $vgpr0 - %1:_(s1024) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i1024) + %0:_(i32) = COPY $vgpr0 + %1:_(i1024) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i1024) ... --- @@ -509,14 +509,14 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s128 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[COPY]](i64), [[ASHR]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i128) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = G_SEXT %0(i64) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -528,14 +528,14 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s192 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s192) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[COPY]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i192) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i192) = G_SEXT %0(i64) + S_ENDPGM 0, implicit %1(i192) ... --- @@ -547,14 +547,14 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s256 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[COPY]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i256) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i256) = G_SEXT %0(i64) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -566,14 +566,14 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s512 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s512) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s512) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[COPY]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i512) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i512) = G_SEXT %0(i64) + S_ENDPGM 0, implicit %1(i512) ... --- @@ -585,14 +585,14 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s1024 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s1024) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s1024) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[COPY]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i1024) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i1024) = G_SEXT %0(i64) + S_ENDPGM 0, implicit %1(i1024) ... --- @@ -604,17 +604,17 @@ body: | ; CHECK-LABEL: name: test_sext_s96_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s128) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i128) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i128) = G_SEXT %0(i96) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -626,15 +626,15 @@ body: | ; CHECK-LABEL: name: test_sext_s128_to_s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[ASHR]](s64), [[ASHR]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s256) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[UV]](i64), [[UV1]](i64), [[ASHR]](i64), [[ASHR]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i256) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i256) = G_SEXT %0(i128) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -646,80 +646,80 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s88 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C4]](s16) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C5]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C5]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C5]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C3]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL7]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL8]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C3]](s16) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C3]](s16) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL11]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C1]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL12]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV2]](s704) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC5]](s88) - %0:_(s32) = COPY $vgpr0 - %1:_(s88) = G_SEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C3]](i16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C3]](i16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 7 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[ASHR]], [[C4]](i16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C5]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C5]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C3]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C5]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C5]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C3]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C3]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C3]](i16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL5]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[OR3]](i16) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C1]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL6]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(i16) = G_SHL [[AND9]], [[C3]](i16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i16) = G_OR [[AND8]], [[SHL7]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(i16) = G_SHL [[AND11]], [[C3]](i16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i16) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(i16) = G_SHL [[AND13]], [[C3]](i16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i16) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(i16) = G_SHL [[AND15]], [[C3]](i16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i16) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[OR6]](i16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[OR7]](i16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(i32) = G_SHL [[ZEXT5]], [[C1]](i32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL11]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[OR8]](i16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[OR9]](i16) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(i32) = G_SHL [[ZEXT7]], [[C1]](i32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(i32) = G_OR [[ZEXT6]], [[SHL12]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR10]](i32), [[OR11]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i704) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64), [[MV1]](i64) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(i88) = G_TRUNC [[MV2]](i704) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC5]](i88) + %0:_(i32) = COPY $vgpr0 + %1:_(i88) = G_SEXT %0(i32) + S_ENDPGM 0, implicit %1(i88) ... # The instruction count blows up for this and takes too long to @@ -731,8 +731,8 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:_(s32) = COPY $vgpr0 -# %1:_(s65) = G_SEXT %0 +# %0:_(i32) = COPY $vgpr0 +# %1:_(i65) = G_SEXT %0 # S_ENDPGM 0, implicit %1 # ... @@ -744,9 +744,9 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:_(s32) = COPY $vgpr0 -# %1:_(s2) = G_TRUNC %0 -# %2:_(s112) = G_SEXT %1 +# %0:_(i32) = COPY $vgpr0 +# %1:_(i2) = G_TRUNC %0 +# %2:_(i112) = G_SEXT %1 # S_ENDPGM 0, implicit %2 # ... @@ -758,13 +758,13 @@ body: | ; CHECK-LABEL: name: test_sext_s112_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[UV1]], 48 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[SEXT_INREG]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s112) = G_TRUNC %0 - %2:_(s128) = G_SEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[UV1]], 48 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[UV]](i64), [[SEXT_INREG]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i112) = G_TRUNC %0(i128) + %2:_(i128) = G_SEXT %1(i112) + S_ENDPGM 0, implicit %2(i128) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir index d87212d64d625..dd457a545a8be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir @@ -12,15 +12,15 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) - ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), addrspace 6) + ; CI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 4, addrspace 6) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p6) :: (load (i32), addrspace 6) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -33,15 +33,15 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) - ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), align 2, addrspace 6) + ; CI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 2, addrspace 6) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p6) :: (load (i32), align 2, addrspace 6) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -54,15 +54,15 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) - ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), align 1, addrspace 6) + ; CI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 1, addrspace 6) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p6) :: (load (i32), align 1, addrspace 6) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -75,14 +75,14 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[MV]](p4) :: (load (i8), addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p6) :: (load (i8), addrspace 6) + $vgpr0 = COPY %1(i32) ... --- @@ -95,14 +95,14 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[MV]](p4) :: (load (i16), addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p6) :: (load (i16), addrspace 6) + $vgpr0 = COPY %1(i32) ... --- @@ -115,12 +115,12 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[MV]](p4) :: (load (i16), align 1, addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p6) :: (load (i16), align 1, addrspace 6) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir index 24243e278a8a6..80281054b609d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir @@ -11,17 +11,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) - ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i8)) + ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; VI-LABEL: name: test_sextload_flat_i32_i8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p0) :: (load (i8)) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_flat_i32_i16 @@ -33,17 +34,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) - ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i16)) + ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; VI-LABEL: name: test_sextload_flat_i32_i16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) - %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 0) - $vgpr0 = COPY %1 + ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_SEXTLOAD %0(p0) :: (load (i16)) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_flat_i31_i8 @@ -55,18 +57,19 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) - ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i8)) + ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; VI-LABEL: name: test_sextload_flat_i31_i8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_SEXTLOAD %0(p0) :: (load (i8)) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_sextload_flat_i64_i8 @@ -78,19 +81,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) - ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i8)) + ; SI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; VI-LABEL: name: test_sextload_flat_i64_i8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p0) :: (load (i8)) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_flat_i64_i16 @@ -102,19 +106,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) - ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i16)) + ; SI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; VI-LABEL: name: test_sextload_flat_i64_i16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p0) :: (load (i16)) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_flat_i64_i32 @@ -126,17 +131,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; SI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; VI-LABEL: name: test_sextload_flat_i64_i32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p0) :: (load (i32)) + $vgpr0_vgpr1 = COPY %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir index 181cd132e91d9..bbc5b35fdb6fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -6,12 +6,12 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i64_from_v2s16) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s32>), addrspace 1) (in function: test_sextload_global_v2i64_from_v2s32) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_SEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_sextload_global_s128_8) +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x i16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x i8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x i8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x i16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i64>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x i16>), addrspace 1) (in function: test_sextload_global_v2i64_from_v2s16) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i64>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x i32>), addrspace 1) (in function: test_sextload_global_v2i64_from_v2s32) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(i128) = G_SEXTLOAD %0:_(p1) :: (load (i64), addrspace 1) (in function: test_sextload_global_s128_8) # ERR-NOT: remark --- @@ -24,19 +24,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 1 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 1 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i1), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -49,19 +50,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 7 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i7 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 7 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i7), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_global_i32_i24 @@ -73,29 +75,30 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX8-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX8-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX8-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[SEXTLOAD]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i24 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[SEXTLOAD]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i24), align 4, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -108,19 +111,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 30 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i30 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 30 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i30), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -133,19 +137,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 31 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i31 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31 - ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 31 + ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i31), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -158,17 +163,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -181,17 +187,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i32_i16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i16), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_global_i31_i8 @@ -203,18 +210,19 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_i31_i8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_sextload_global_i64_i8 @@ -226,19 +234,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; GFX6-LABEL: name: test_sextload_global_i64_i8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p1) :: (load (i8), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_global_i64_i16 @@ -250,19 +259,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; GFX6-LABEL: name: test_sextload_global_i64_i16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p1) :: (load (i16), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_global_i64_i32 @@ -274,19 +284,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; GFX6-LABEL: name: test_sextload_global_i64_i32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -299,23 +310,24 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_sextload_global_s32_from_2_align1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[SEXTLOAD]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (load (i16), align 1, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -328,25 +340,26 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) + ; ; GFX6-LABEL: name: test_sextload_global_s64_from_2_align1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[SEXTLOAD]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[OR]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p1) :: (load (i16), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -359,17 +372,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](<2 x i16>) + ; ; GFX6-LABEL: name: test_sextload_global_v2i16_from_v2s8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_SEXTLOAD %0 :: (load (<2 x s8>), addrspace 1) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_SEXTLOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -382,17 +396,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x i32>) + ; ; GFX6-LABEL: name: test_sextload_global_v2i32_from_v2s8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load (<2 x s8>), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_SEXTLOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -405,17 +420,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x i32>) + ; ; GFX6-LABEL: name: test_sextload_global_v2i32_from_v2s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load (<2 x s16>), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_SEXTLOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -428,17 +444,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x i64>) + ; ; GFX6-LABEL: name: test_sextload_global_v2i64_from_v2s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load (<2 x s16>), addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_SEXTLOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -451,17 +468,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x i64>) + ; ; GFX6-LABEL: name: test_sextload_global_v2i64_from_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load (<2 x s32>), addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_SEXTLOAD %0(p1) :: (load (<2 x i32>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -474,15 +492,16 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i128) = G_SEXTLOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](i128) + ; ; GFX6-LABEL: name: test_sextload_global_s128_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128) + ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i128) = G_SEXTLOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](i128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_SEXTLOAD %0 :: (load (s64), addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_SEXTLOAD %0(p1) :: (load (i64), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir index dbcdbb7bc2dd0..7c0decf5fbe12 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir @@ -11,11 +11,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_local_i32_i16 @@ -27,11 +27,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_local_i31_i8 @@ -43,12 +43,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_SEXTLOAD %0(p3) :: (load (i8), addrspace 3) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_sextload_local_i64_i8 @@ -60,12 +60,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_local_i64_i16 @@ -77,12 +77,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_local_i64_i32 @@ -94,10 +94,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p3) :: (load (i32), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir index c30ec41c351dc..3d0ade48f8364 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir @@ -12,12 +12,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 5) + %1:_(i32) = G_SEXTLOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %1(i32) - $vgpr0 = COPY %1 ... --- name: test_sextload_private_i32_i16 @@ -29,11 +29,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_SEXTLOAD %0(p5) :: (load (i16), addrspace 5) + $vgpr0 = COPY %1(i32) ... --- name: test_sextload_private_i31_i8 @@ -45,12 +45,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_SEXTLOAD %0(p5) :: (load (i8), addrspace 5) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_sextload_private_i64_i8 @@ -62,12 +62,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_private_i64_i16 @@ -79,12 +79,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[SEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p5) :: (load (i16), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_sextload_private_i64_i32 @@ -96,10 +96,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[LOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_SEXTLOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index b1de5dbdff9f1..6bf5d9e0ef854 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -14,30 +14,30 @@ body: | ; SI-LABEL: name: test_shl_s32_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; GFX9-LABEL: name: test_shl_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SHL %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SHL %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- name: test_shl_s64_s64 @@ -48,33 +48,33 @@ body: | ; SI-LABEL: name: test_shl_s64_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; VI-LABEL: name: test_shl_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; GFX9-LABEL: name: test_shl_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SHL %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_shl_s64_s32 @@ -85,30 +85,30 @@ body: | ; SI-LABEL: name: test_shl_s64_s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; VI-LABEL: name: test_shl_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; GFX9-LABEL: name: test_shl_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_SHL %0, %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: test_shl_s64_s16 @@ -119,37 +119,37 @@ body: | ; SI-LABEL: name: test_shl_s64_s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; VI-LABEL: name: test_shl_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[AND]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) ; ; GFX9-LABEL: name: test_shl_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(s64) = G_SHL %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[AND]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + %3:_(i64) = G_SHL %0, %2(i16) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -161,38 +161,38 @@ body: | ; SI-LABEL: name: test_shl_s16_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_shl_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_SHL %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_SHL %2, %1(i32) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -204,41 +204,41 @@ body: | ; SI-LABEL: name: test_shl_s16_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_shl_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SHL %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SHL %2, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -250,45 +250,45 @@ body: | ; SI-LABEL: name: test_shl_s16_i8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_shl_s16_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s16) = G_SHL %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[AND]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i16) = G_SHL %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -300,45 +300,45 @@ body: | ; SI-LABEL: name: test_shl_i8_i8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_shl_i8_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SHL %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SHL %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -351,42 +351,42 @@ body: | ; SI-LABEL: name: test_shl_v2s32_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SHL]](i32), [[SHL1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_shl_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SHL]](i32), [[SHL1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_shl_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SHL]](i32), [[SHL1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHL %0, %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -398,45 +398,45 @@ body: | ; SI-LABEL: name: test_shl_v3s32_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV3]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV4]](i32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[UV2]], [[UV5]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SHL]](i32), [[SHL1]](i32), [[SHL2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_shl_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV3]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV4]](i32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[UV2]], [[UV5]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SHL]](i32), [[SHL1]](i32), [[SHL2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_shl_v3s32_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV3]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV4]](i32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[UV2]], [[UV5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SHL]](i32), [[SHL1]](i32), [[SHL2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_SHL %0, %1(<3 x i32>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -448,42 +448,42 @@ body: | ; SI-LABEL: name: test_shl_v2s64_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV2]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UV2]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; VI-LABEL: name: test_shl_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV2]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UV2]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_shl_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s64>) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UV2]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i64>) = G_SHL %0, %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -495,54 +495,54 @@ body: | ; SI-LABEL: name: test_shl_v3s64_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UV4]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UV5]](i32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[UV6]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64), [[SHL2]](i64), [[UV10]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; VI-LABEL: name: test_shl_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UV4]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UV5]](i32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[UV6]](i32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64), [[SHL2]](i64), [[UV10]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) ; ; GFX9-LABEL: name: test_shl_v3s64_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<3 x s64>) = G_EXTRACT %0, 0 - %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - %3:_(<3 x s64>) = G_SHL %1, %2 - %4:_(<4 x s64>) = G_IMPLICIT_DEF - %5:_(<4 x s64>) = G_INSERT %4, %3, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UV4]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UV5]](i32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[UV6]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i64>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(i64), [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64), [[UV10:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[DEF]](<4 x i64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[SHL]](i64), [[SHL1]](i64), [[SHL2]](i64), [[UV10]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<3 x i64>) = G_EXTRACT %0(<4 x i64>), 0 + %2:_(<3 x i32>) = COPY $vgpr8_vgpr9_vgpr10 + %3:_(<3 x i64>) = G_SHL %1, %2(<3 x i32>) + %4:_(<4 x i64>) = G_IMPLICIT_DEF + %5:_(<4 x i64>) = G_INSERT %4, %3(<3 x i64>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5(<4 x i64>) ... --- @@ -554,58 +554,58 @@ body: | ; SI-LABEL: name: test_shl_v2s16_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[AND]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[LSHR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; VI-LABEL: name: test_shl_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SHL]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SHL1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_shl_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHL %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[COPY1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -617,65 +617,65 @@ body: | ; SI-LABEL: name: test_shl_v2s16_v2s32 ; SI: liveins: $vgpr0, $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[UV]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[UV1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[UV]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[UV1]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL2]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; VI-LABEL: name: test_shl_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SHL]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SHL1]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: test_shl_v2s16_v2s32 ; GFX9: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s16>) = G_SHL %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SHL]](i16), [[SHL1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x i16>) = G_SHL %0, %1(<2 x i32>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -686,132 +686,132 @@ body: | ; SI-LABEL: name: test_shl_v3s16_v3s16 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[AND]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[LSHR1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[AND1]](i32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL3]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[SHL2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL4]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL5]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC4]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC5]](i16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SHL]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SHL1]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SHL2]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL5]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_SHL %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV3]], [[UV9]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[SHL]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV13]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SHL1]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>) = G_SHL %2, %4(<3 x i16>) + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) ... @@ -824,95 +824,95 @@ body: | ; SI-LABEL: name: test_shl_v4s16_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[LSHR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[AND]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[LSHR2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[AND1]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[LSHR3]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[SHL2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[SHL3]], [[C1]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_shl_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SHL3]](s16) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC4]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC5]](i16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC6]](i16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[TRUNC7]](i16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SHL]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SHL1]](i16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SHL2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SHL3]](i16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_shl_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SHL]](<2 x s16>), [[SHL1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SHL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV]], [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[UV1]], [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SHL]](<2 x i16>), [[SHL1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SHL %0, %1(<4 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -924,45 +924,45 @@ body: | ; SI-LABEL: name: test_shl_s7_s7 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND]](i16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_shl_s7_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SHL %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[AND]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SHL]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SHL %2, %3(i7) + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -974,32 +974,32 @@ body: | ; SI-LABEL: name: test_shl_i24_i32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; SI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; VI-LABEL: name: test_shl_i24_i32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; VI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; VI-NEXT: $vgpr0 = COPY [[SHL]](i32) ; ; GFX9-LABEL: name: test_shl_i24_i32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_SHL %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_SHL %2, %1(i32) + %4:_(i32) = G_ANYEXT %3(i24) + $vgpr0 = COPY %4(i32) ... --- @@ -1011,79 +1011,79 @@ body: | ; SI-LABEL: name: test_shl_s128_s128 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[SUB1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[COPY1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV1]], [[SELECT1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[SUB1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[COPY1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV1]], [[SELECT1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s128 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_ZEXT %1 - %3:_(s128) = G_SHL %0, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV1]], [[SELECT1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_ZEXT %1(i32) + %3:_(i128) = G_SHL %0, %2(i128) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(i128) ... --- @@ -1095,78 +1095,78 @@ body: | ; SI-LABEL: name: test_shl_s128_s132 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[SUB1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[COPY1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV1]], [[SELECT1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[SUB1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[COPY1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV1]], [[SELECT1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s132 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV1]], [[SELECT1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1178,24 +1178,24 @@ body: | ; SI-LABEL: name: test_shl_s128_s32_0 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) ; ; VI-LABEL: name: test_shl_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s32_0 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 0 - %3:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... @@ -1208,48 +1208,48 @@ body: | ; SI-LABEL: name: test_shl_s128_s32_23 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s32_23 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 23 - %3:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 41 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 23 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1261,48 +1261,48 @@ body: | ; SI-LABEL: name: test_shl_s128_s32_31 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s32_31 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 31 - %3:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1314,45 +1314,45 @@ body: | ; SI-LABEL: name: test_shl_s128_s32_32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s32_32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 32 - %3:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 32 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1364,48 +1364,48 @@ body: | ; SI-LABEL: name: test_shl_s128_s32_33 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s32_33 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 33 - %3:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV]], [[C1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SHL]](i64), [[OR]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 33 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1417,39 +1417,39 @@ body: | ; SI-LABEL: name: test_shl_s128_s32_127 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C1]](i32) + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](i64), [[SHL]](i64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; VI-LABEL: name: test_shl_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C1]](i32) + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](i64), [[SHL]](i64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) ; ; GFX9-LABEL: name: test_shl_s128_s32_127 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = G_CONSTANT i32 127 - %3:_(s128) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[C1]](i32) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[C]](i64), [[SHL]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = G_CONSTANT i32 127 + %2:_(i128) = G_SHL %0, %1(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -1461,247 +1461,247 @@ body: | ; SI-LABEL: name: test_shl_s256_s256 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL3]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] - ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] - ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] - ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] - ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] - ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) - ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] - ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT14]] - ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT15]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[SUB3]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[COPY1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[SUB2]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[SHL]], [[C3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[SHL2]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV3]], [[SELECT1]] + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[SUB1]](i32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR2]], [[SHL3]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR3]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR1]], [[C3]] + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[COPY1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[COPY1]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL5]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL4]], [[C3]] + ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL6]] + ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; SI-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; SI-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(i64) = G_SHL [[UV8]], [[SUB]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV8]], [[SUB9]](i32) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(i64) = G_SHL [[UV9]], [[SUB]](i32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR5]], [[SHL8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(i64) = G_SHL [[UV8]], [[SUB8]](i32) + ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[SHL7]], [[C3]] + ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[SHL9]] + ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV9]], [[SELECT10]] + ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT]], [[C3]] + ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[C3]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT12]](i64), [[SELECT13]](i64) + ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT9]] + ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; SI-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV10]], [[SELECT14]] + ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV11]], [[SELECT15]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) ; ; VI-LABEL: name: test_shl_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL3]] - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] - ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] - ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] - ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] - ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] - ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] - ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) - ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] - ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT14]] - ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT15]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[SUB3]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[COPY1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[SUB2]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[SHL]], [[C3]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[SHL2]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV3]], [[SELECT1]] + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[SUB1]](i32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR2]], [[SHL3]] + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR3]] + ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR1]], [[C3]] + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[COPY1]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[COPY1]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL5]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL4]], [[C3]] + ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL6]] + ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; VI-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(i64) = G_SHL [[UV8]], [[SUB]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV8]], [[SUB9]](i32) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(i64) = G_SHL [[UV9]], [[SUB]](i32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR5]], [[SHL8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(i64) = G_SHL [[UV8]], [[SUB8]](i32) + ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[SHL7]], [[C3]] + ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[SHL9]] + ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV9]], [[SELECT10]] + ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT]], [[C3]] + ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[C3]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT12]](i64), [[SELECT13]](i64) + ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT9]] + ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; VI-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV10]], [[SELECT14]] + ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV11]], [[SELECT15]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) ; ; GFX9-LABEL: name: test_shl_s256_s256 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL3]] - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] - ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] - ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] - ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] - ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] - ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) - ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) - ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] - ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] - ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] - ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) - ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] - ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT14]] - ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT15]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s32) = COPY $vgpr8 - %2:_(s256) = G_ZEXT %1 - %3:_(s256) = G_SHL %0, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 128 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV2]], [[SUB3]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV3]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV2]], [[SUB2]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[SHL]], [[C3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR]], [[SHL2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV3]], [[SELECT1]] + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[C2]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB1]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB1]](i32), [[C1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB1]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[SUB5]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR2]], [[SHL3]] + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[UV5]], [[SUB4]](i32) + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[OR1]], [[LSHR3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP5]](i1), [[UV4]], [[SELECT3]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP4]](i1), [[LSHR1]], [[C3]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY1]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB7]](i32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[COPY1]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[LSHR4]], [[SHL5]] + ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB6]](i32) + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[SHL4]], [[C3]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[OR2]], [[SHL6]] + ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[UV7]], [[SELECT7]] + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[SELECT4]], [[SELECT6]] + ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[SELECT5]], [[SELECT8]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i64), [[UV9:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SUB]], [[C2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[SUB]] + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SUB]](i32), [[C2]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SUB]](i32), [[C1]] + ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(i64) = G_SHL [[UV8]], [[SUB]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i64) = G_LSHR [[UV8]], [[SUB9]](i32) + ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(i64) = G_SHL [[UV9]], [[SUB]](i32) + ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(i64) = G_OR [[LSHR5]], [[SHL8]] + ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(i64) = G_SHL [[UV8]], [[SUB8]](i32) + ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[SHL7]], [[C3]] + ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(i64) = G_SELECT [[ICMP8]](i1), [[OR5]], [[SHL9]] + ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(i64) = G_SELECT [[ICMP9]](i1), [[UV9]], [[SELECT10]] + ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SELECT2]], [[C3]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT12]](i64), [[SELECT13]](i64) + ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR3]], [[SELECT9]] + ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR4]], [[SELECT11]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i64), [[UV11:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV10]], [[SELECT14]] + ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV11]], [[SELECT15]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT16]](i64), [[SELECT17]](i64) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i128), [[MV1]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](i256) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i32) = COPY $vgpr8 + %2:_(i256) = G_ZEXT %1(i32) + %3:_(i256) = G_SHL %0, %2(i256) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3(i256) ... --- @@ -1713,129 +1713,129 @@ body: | ; SI-LABEL: name: test_shl_v2s128_v2s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[UV2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[UV2]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB3]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[UV3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL4]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL3]], [[C2]] - ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[SHL5]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV7]], [[SELECT4]] - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV4]], [[UV2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[SUB1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[UV2]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV4]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; SI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[UV3]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB3]](i32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[UV3]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL4]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB2]](i32) + ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[SHL3]], [[C2]] + ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[SHL5]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV7]], [[SELECT4]] + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT3]](i64), [[SELECT5]](i64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; VI-LABEL: name: test_shl_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[UV2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[UV2]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB3]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[UV3]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL4]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL3]], [[C2]] - ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[SHL5]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV7]], [[SELECT4]] - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV4]], [[UV2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[SUB1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[UV2]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV4]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; VI-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[UV3]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB3]](i32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[UV3]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL4]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB2]](i32) + ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[SHL3]], [[C2]] + ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[SHL5]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV7]], [[SELECT4]] + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT3]](i64), [[SELECT5]](i64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) ; ; GFX9-LABEL: name: test_shl_v2s128_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV2]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV2]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV2]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV2]](s32), [[C1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[UV2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[UV2]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV3]], [[C]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV3]](s32), [[C]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB3]](s32) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[UV3]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL4]] - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB2]](s32) - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL3]], [[C2]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR1]], [[SHL5]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV7]], [[SELECT4]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x s128>) = G_SHL %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i64), [[UV5:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV]](i128) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV2]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV2]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV2]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV2]](i32), [[C1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV4]], [[UV2]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[UV4]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV5]], [[UV2]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[UV4]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i64), [[UV7:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[UV1]](i128) + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV3]], [[C]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UV3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[UV3]](i32), [[C]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV3]](i32), [[C1]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[UV3]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[UV6]], [[SUB3]](i32) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i64) = G_SHL [[UV7]], [[UV3]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[LSHR1]], [[SHL4]] + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(i64) = G_SHL [[UV6]], [[SUB2]](i32) + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[SHL3]], [[C2]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[OR1]], [[SHL5]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[UV7]], [[SELECT4]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT3]](i64), [[SELECT5]](i64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i128>) = G_BUILD_VECTOR [[MV]](i128), [[MV1]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x i128>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x i128>) = G_SHL %0, %1(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(<2 x i128>) ... --- @@ -1847,95 +1847,95 @@ body: | ; SI-LABEL: name: test_shl_s65_s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %22(s64) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[TRUNC]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %22(i64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[TRUNC]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[TRUNC]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; VI-LABEL: name: test_shl_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %22(s64) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[TRUNC]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %22(i64) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[TRUNC]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[TRUNC]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX9-LABEL: name: test_shl_s65_s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %22(s64) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[TRUNC]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s65) = G_TRUNC %0 - %3:_(s65) = G_SHL %2, %3 - %4:_(s96) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %22(i64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[TRUNC]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[TRUNC]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i65) = G_TRUNC %0(i96) + %3:_(i65) = G_SHL %2, %3(i65) + %4:_(i96) = G_ANYEXT %3(i65) + $vgpr0_vgpr1_vgpr2 = COPY %4(i96) ... --- @@ -1947,92 +1947,92 @@ body: | ; SI-LABEL: name: test_shl_s65_s32_constant8 ; SI: liveins: $vgpr0_vgpr1_vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %22(s64) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[TRUNC]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %22(i64) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[TRUNC]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[TRUNC]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; VI-LABEL: name: test_shl_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %22(s64) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[TRUNC]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %22(i64) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[TRUNC]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[TRUNC]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) ; ; GFX9-LABEL: name: test_shl_s65_s32_constant8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %22(s64) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[TRUNC]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C1]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[TRUNC]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s65) = G_TRUNC %0 - %3:_(s65) = G_SHL %2, %3 - %4:_(s96) = G_ANYEXT %3 - $vgpr0_vgpr1_vgpr2 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC %22(i64) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[TRUNC]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[TRUNC]](i32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[TRUNC]](i32), [[C1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[TRUNC]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[TRUNC]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL]], [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i65) = G_TRUNC %0(i96) + %3:_(i65) = G_SHL %2, %3(i65) + %4:_(i96) = G_ANYEXT %3(i65) + $vgpr0_vgpr1_vgpr2 = COPY %4(i96) ... --- @@ -2044,98 +2044,98 @@ body: | ; SI-LABEL: name: test_shl_s65_s32_known_pow2 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SHL]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SHL]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL1]], [[C3]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL3]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C1]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SHL]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SHL]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL1]], [[C3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL3]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; SI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) ; ; VI-LABEL: name: test_shl_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SHL]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SHL]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SHL]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL1]], [[C3]] - ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL3]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; VI-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C1]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SHL]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SHL]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[SHL]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL1]], [[C3]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL3]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; VI-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) ; ; GFX9-LABEL: name: test_shl_s65_s32_known_pow2 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[C1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SHL]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SHL]](s32), [[C1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SHL]](s32), [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SHL]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s32) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SHL]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL1]], [[C3]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[MV1]], [[SELECT1]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_SHL %2, %1 - %4:_(s65) = G_TRUNC %0 - %5:_(s65) = G_SHL %4, %3 - %6:_(s96) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[COPY1]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[DEF]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[C1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[SHL]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[SHL]](i32), [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[SHL]](i32), [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SHL]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MV]], [[SUB1]](i32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i64) = G_SHL [[MV1]], [[SHL]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i64) = G_SHL [[MV]], [[SUB]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[SHL1]], [[C3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[OR]], [[SHL3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[MV1]], [[SELECT1]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[SELECT]](i64), [[SELECT2]](i64) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV2]](i128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_SHL %2, %1(i32) + %4:_(i65) = G_TRUNC %0(i96) + %5:_(i65) = G_SHL %4, %3(i32) + %6:_(i96) = G_ANYEXT %5(i65) + $vgpr0_vgpr1_vgpr2 = COPY %6(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index c8bd8ab33f18c..00faf5a488d4a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: shufflevector_scalar_src ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(i32), %1, shufflemask(0, 1) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -34,14 +34,14 @@ body: | ; CHECK-LABEL: name: shufflevector_scalar_src_dst ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1) - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SHUFFLE_VECTOR %0(i32), %1, shufflemask(1) + $vgpr0 = COPY %2(i32) ... @@ -56,16 +56,16 @@ body: | ; CHECK-LABEL: name: shufflevector_scalar_dst ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2) - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(i32) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(2) + $vgpr0 = COPY %2(i32) ... @@ -80,18 +80,18 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s32_0_1 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(0, 1) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -106,18 +106,18 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s32_1_0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(1, 0) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -132,18 +132,18 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s32_0_0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(0, 0) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -158,15 +158,15 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s32_undef_undef ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[DEF]](i32), [[DEF]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(undef, undef) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -181,17 +181,17 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s32_undef_0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[DEF]](i32), [[COPY2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(undef, 0) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -206,13 +206,13 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s32_0_undef ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<2 x i32>), %1, shufflemask(0, undef) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -227,20 +227,20 @@ body: | ; CHECK-LABEL: name: shufflevector_v3s32_3_2_1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2, 1) - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV5]](i32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV7]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_SHUFFLE_VECTOR %0(<3 x i32>), %1, shufflemask(3, 2, 1) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... @@ -255,18 +255,18 @@ body: | ; CHECK-LABEL: name: shufflevector_v3s32_3_2_1_smaller ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 1) - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<2 x i32>) = G_SHUFFLE_VECTOR %0(<3 x i32>), %1, shufflemask(2, 1) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... @@ -281,24 +281,24 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s16_0_1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, 1) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -313,24 +313,24 @@ body: | ; CHECK-LABEL: name: shufflevector_v2s16_1_0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(1, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -345,32 +345,32 @@ body: | ; CHECK-LABEL: name: shufflevector_v3s16_2_0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<3 x s16>) = G_EXTRACT %0, 0 - %3:_(<3 x s16>) = G_EXTRACT %1, 0 - %4:_(<4 x s16>) = G_SHUFFLE_VECTOR %2, %3, shufflemask(5, 1, 3, 0) - $vgpr0_vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %3:_(<3 x i16>) = G_EXTRACT %1(<4 x i16>), 0 + %4:_(<4 x i16>) = G_SHUFFLE_VECTOR %2(<3 x i16>), %3, shufflemask(5, 1, 3, 0) + $vgpr0_vgpr1 = COPY %4(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index 62d3fb8b3db72..ff12d46567dc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -15,27 +15,27 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_undef_undef ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[DEF]](i16), [[DEF]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(undef, undef) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -50,35 +50,35 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_0_undef ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, undef) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -93,35 +93,35 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_undef_0 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[DEF]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(undef, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -136,40 +136,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_0_1 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, 1) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -184,40 +184,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_1_0 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(1, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -232,40 +232,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_0_0 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -280,36 +280,36 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_1_1 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(1, 1) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -324,40 +324,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_2_2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(2, 2) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -372,35 +372,35 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_2_undef ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(2, undef) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -415,35 +415,35 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_undef_2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[DEF]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(undef, 2) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -458,40 +458,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_2_3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(2, 3) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -506,40 +506,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_3_2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(3, 2) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -554,33 +554,33 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_undef_3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[C1]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[DEF]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(undef, 3) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -595,33 +595,33 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_3_undef ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(3, undef) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -636,36 +636,36 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_3_3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(3, 3) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -680,40 +680,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_0_2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 2) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, 2) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -728,40 +728,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_2_0 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C2]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -776,40 +776,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_3_0 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 0) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(3, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -824,40 +824,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_0_3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 3) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(0, 3) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -872,40 +872,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_1_2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 2) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(1, 2) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -920,40 +920,40 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v2s16_2_1 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 1) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<2 x i16>), %1, shufflemask(2, 1) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -968,54 +968,54 @@ body: | ; GFX8-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<3 x s16>) = G_EXTRACT %0, 0 - %3:_(<3 x s16>) = G_EXTRACT %1, 0 - %4:_(<4 x s16>) = G_SHUFFLE_VECTOR %2, %3, shufflemask(5, 1, 3, 0) - $vgpr0_vgpr1 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %3:_(<3 x i16>) = G_EXTRACT %1(<4 x i16>), 0 + %4:_(<4 x i16>) = G_SHUFFLE_VECTOR %2(<3 x i16>), %3, shufflemask(5, 1, 3, 0) + $vgpr0_vgpr1 = COPY %4(<4 x i16>) ... @@ -1030,48 +1030,48 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v4s16_1_0 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C1]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x i32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x i32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<4 x i16>), %1, shufflemask(1, 0) + $vgpr0 = COPY %2(<2 x i16>) ... @@ -1086,43 +1086,43 @@ body: | ; GFX8-LABEL: name: shufflevector_v2s16_v4s16_1_3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x i32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 3) - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x i32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i16>) = G_SHUFFLE_VECTOR %0(<4 x i16>), %1, shufflemask(1, 3) + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir index 4cbdea64f1c00..81525aa205e1c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -11,18 +11,22 @@ body: | ; GFX6-LABEL: name: test_sitofp_s32_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s32_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[SITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SITOFP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_SITOFP %0(i32) + %2:_(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -34,18 +38,22 @@ body: | ; GFX6-LABEL: name: test_sitofp_s32_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SITOFP]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_sitofp_s32_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_SITOFP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SITOFP]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f64) = G_SITOFP %0(i32) + %2:_(i64) = G_BITCAST %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -57,24 +65,28 @@ body: | ; GFX6-LABEL: name: test_sitofp_v2s32_to_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32) - ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[UV]](i32) + ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(f32) = G_SITOFP [[UV1]](i32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[SITOFP]](f32), [[SITOFP1]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x i32>) + ; ; GFX8-LABEL: name: test_sitofp_v2s32_to_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32) - ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_SITOFP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[UV]](i32) + ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(f32) = G_SITOFP [[UV1]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[SITOFP]](f32), [[SITOFP1]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_SITOFP %0(<2 x i32>) + %2:_(<2 x i32>) = G_BITCAST %1(<2 x f32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -86,24 +98,28 @@ body: | ; GFX6-LABEL: name: test_sitofp_v2s32_to_v2s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32) - ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[UV]](i32) + ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(f64) = G_SITOFP [[UV1]](i32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[SITOFP]](f64), [[SITOFP1]](f64) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x i64>) + ; ; GFX8-LABEL: name: test_sitofp_v2s32_to_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32) - ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_SITOFP %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[UV]](i32) + ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(f64) = G_SITOFP [[UV1]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f64>) = G_BUILD_VECTOR [[SITOFP]](f64), [[SITOFP1]](f64) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i64>) = G_BITCAST [[BUILD_VECTOR]](<2 x f64>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f64>) = G_SITOFP %0(<2 x i32>) + %2:_(<2 x i64>) = G_BITCAST %1(<2 x f64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -115,50 +131,54 @@ body: | ; GFX6-LABEL: name: test_sitofp_s64_to_s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s64_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SITOFP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f32) = G_SITOFP %0(i64) + %2:_(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -170,28 +190,32 @@ body: | ; GFX6-LABEL: name: test_sitofp_s64_to_s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[SITOFP]], [[C]](s32) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[UV1]](i32) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = G_FLDEXP [[SITOFP]], [[C]](i32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FLDEXP]], [[UITOFP]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_sitofp_s64_to_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[SITOFP]], [[C]](s32) - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SITOFP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[UV1]](i32) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = G_FLDEXP [[SITOFP]], [[C]](i32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FLDEXP]], [[UITOFP]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_SITOFP %0(i64) + %2:_(i64) = G_BITCAST %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -203,24 +227,28 @@ body: | ; GFX6-LABEL: name: test_sitofp_s16_to_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f16) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SITOFP]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s16_to_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[TRUNC]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_SITOFP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f16) = G_SITOFP [[TRUNC]](i16) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SITOFP]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_SITOFP %1(i16) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -232,21 +260,25 @@ body: | ; GFX6-LABEL: name: test_sitofp_s16_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s16_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[SITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s32) = G_SITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f32) = G_SITOFP %1(i16) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -258,21 +290,25 @@ body: | ; GFX6-LABEL: name: test_sitofp_s16_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SITOFP]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_sitofp_s16_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_SITOFP %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SITOFP]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f64) = G_SITOFP %1(i16) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -284,24 +320,28 @@ body: | ; GFX6-LABEL: name: test_sitofp_s8_to_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f16) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SITOFP]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s8_to_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s16) = G_SITOFP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f16) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SITOFP]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(f16) = G_SITOFP %1(i8) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -313,21 +353,25 @@ body: | ; GFX6-LABEL: name: test_sitofp_s8_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s8_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[SITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s32) = G_SITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SITOFP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(f32) = G_SITOFP %1(i8) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -339,21 +383,25 @@ body: | ; GFX6-LABEL: name: test_sitofp_s8_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SITOFP]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_sitofp_s8_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[SEXT_INREG]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s64) = G_SITOFP %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f64) = G_SITOFP [[SEXT_INREG]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SITOFP]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(f64) = G_SITOFP %1(i8) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -365,28 +413,32 @@ body: | ; GFX6-LABEL: name: test_sitofp_s1_to_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xHBC00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s1_to_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_SITOFP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xHBC00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(f16) = G_SITOFP %1(i1) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -398,25 +450,29 @@ body: | ; GFX6-LABEL: name: test_sitofp_s1_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s1_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_SITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float -1.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(f32) = G_SITOFP %1(i1) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -428,25 +484,29 @@ body: | ; GFX6-LABEL: name: test_sitofp_s1_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_sitofp_s1_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_SITOFP %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double -1.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(f64) = G_SITOFP %1(i1) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -458,53 +518,57 @@ body: | ; GFX6-LABEL: name: test_sitofp_s33_to_s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s33_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s32) = G_SITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[SEXT_INREG]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(f32) = G_SITOFP %1(i33) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -516,55 +580,59 @@ body: | ; GFX6-LABEL: name: test_sitofp_s64_to_s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_sitofp_s64_to_s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_SITOFP %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f16) = G_SITOFP %0(i64) + %2:_(i16) = G_BITCAST %1(f16) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -576,94 +644,102 @@ body: | ; GFX6-LABEL: name: test_sitofp_v2s64_to_v2s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] - ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32) - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]] - ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) - ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]] - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] - ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32) - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP1]], [[SUB3]](s32) - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV2]], [[UV3]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV4]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV5]], [[UMIN1]] + ; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV6]], [[UV7]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[XOR1]], [[C2]](i32) + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR1]] + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](i32) + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[INT1]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[SUB2]], [[ADD1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UMIN2]](i32) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL1]](i64) + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV8]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV9]], [[UMIN3]] + ; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(f32) = G_SITOFP [[OR1]](i32) + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN2]] + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP1]], [[SUB3]](i32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP1]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + ; ; GFX8-LABEL: name: test_sitofp_v2s64_to_v2s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] - ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]] - ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) - ; GFX8-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]] - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] - ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32) - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP1]], [[SUB3]](s32) - ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s16>) = G_SITOFP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV2]], [[UV3]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[XOR]], [[C2]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR]] + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[INT]], [[C1]] + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SUB]], [[ADD]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV4]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV5]], [[UMIN1]] + ; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(f32) = G_SITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP]], [[SUB1]](i32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV6]], [[UV7]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[XOR1]], [[C2]](i32) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[C]], [[ASHR1]] + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](i32) + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[INT1]], [[C1]] + ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[SUB2]], [[ADD1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UMIN2]](i32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL1]](i64) + ; GFX8-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV8]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV9]], [[UMIN3]] + ; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(f32) = G_SITOFP [[OR1]](i32) + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN2]] + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[SITOFP1]], [[SUB3]](i32) + ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP1]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f16>) = G_SITOFP %0(<2 x i64>) + %2:_(<2 x i16>) = G_BITCAST %1(<2 x f16>) + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index 34daf8e8a358d..671a5cfce90ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -14,28 +14,30 @@ body: | ; SI-LABEL: name: test_smax_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](i32) + ; ; VI-LABEL: name: test_smax_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[SMAX]](i32) + ; ; GFX9-LABEL: name: test_smax_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SMAX %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -47,31 +49,33 @@ body: | ; SI-LABEL: name: test_smax_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY]](i64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + ; ; VI-LABEL: name: test_smax_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY]](i64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + ; ; GFX9-LABEL: name: test_smax_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SMAX %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY]](i64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SMAX %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -83,39 +87,41 @@ body: | ; SI-LABEL: name: test_smax_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](i32) + ; ; VI-LABEL: name: test_smax_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_smax_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -127,46 +133,48 @@ body: | ; SI-LABEL: name: test_smax_s8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](i32) + ; ; VI-LABEL: name: test_smax_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C]](i16) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_smax_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG1]](i32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -178,37 +186,39 @@ body: | ; SI-LABEL: name: test_smax_s17 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](i32) + ; ; VI-LABEL: name: test_smax_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; VI-NEXT: $vgpr0 = COPY [[SMAX]](i32) + ; ; GFX9-LABEL: name: test_smax_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_SMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_SMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -220,40 +230,42 @@ body: | ; SI-LABEL: name: test_smax_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[UV2]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; VI-LABEL: name: test_smax_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[UV2]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_smax_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SMAX %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SMAX %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -265,43 +277,45 @@ body: | ; SI-LABEL: name: test_smax_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[UV3]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[UV4]] + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32), [[SMAX2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_smax_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[UV3]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[UV4]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32), [[SMAX2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_smax_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; GFX9-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_SMAX %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[UV3]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[UV4]] + ; GFX9-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32), [[SMAX2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_SMAX %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -313,59 +327,61 @@ body: | ; SI-LABEL: name: test_smax_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SMAX]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; VI-LABEL: name: test_smax_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SMAX]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SMAX1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX9-LABEL: name: test_smax_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x i16>) = G_SMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -377,80 +393,82 @@ body: | ; SI-LABEL: name: test_smax_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMAX]](i32), [[SMAX1]](i32), [[SMAX2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_smax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX1]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX2]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_IMPLICIT_DEF - %2:_(<3 x s16>) = G_SMAX %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - S_NOP 0, implicit %3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x i16>) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[SMAX]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMAX1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[LSHR]](i32), [[ANYEXT]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<3 x i16>) = G_IMPLICIT_DEF + %2:_(<3 x i16>) = G_SMAX %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -462,97 +480,99 @@ body: | ; SI-LABEL: name: test_smax_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; SI-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG6]], [[SEXT_INREG7]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMAX2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMAX3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 16 + ; SI-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SEXT_INREG6]], [[SEXT_INREG7]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SMAX]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SMAX2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SMAX3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; ; VI-LABEL: name: test_smax_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SMAX]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SMAX1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SMAX2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SMAX3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SMAX %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x i16>) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x i16>) = G_SMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SMAX]](<2 x i16>), [[SMAX1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SMAX %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index 90bb0122c3889..fe7f5ca2ad1f9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -14,28 +14,30 @@ body: | ; SI-LABEL: name: test_smin_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](i32) + ; ; VI-LABEL: name: test_smin_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[SMIN]](i32) + ; ; GFX9-LABEL: name: test_smin_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SMIN %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -47,31 +49,33 @@ body: | ; SI-LABEL: name: test_smin_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + ; ; VI-LABEL: name: test_smin_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + ; ; GFX9-LABEL: name: test_smin_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SMIN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SMIN %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -83,39 +87,41 @@ body: | ; SI-LABEL: name: test_smin_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](i32) + ; ; VI-LABEL: name: test_smin_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_smin_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -127,46 +133,48 @@ body: | ; SI-LABEL: name: test_smin_s8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](i32) + ; ; VI-LABEL: name: test_smin_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C]](i16) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_smin_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG1]](i32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -178,37 +186,39 @@ body: | ; SI-LABEL: name: test_smin_s17 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](i32) + ; ; VI-LABEL: name: test_smin_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; VI-NEXT: $vgpr0 = COPY [[SMIN]](i32) + ; ; GFX9-LABEL: name: test_smin_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_SMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_SMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -220,40 +230,42 @@ body: | ; SI-LABEL: name: test_smin_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[UV2]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; VI-LABEL: name: test_smin_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[UV2]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_smin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SMIN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SMIN %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -265,43 +277,45 @@ body: | ; SI-LABEL: name: test_smin_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[UV3]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[UV4]] + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32), [[SMIN2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_smin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[UV3]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[UV4]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32), [[SMIN2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_smin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; GFX9-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_SMIN %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[UV3]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[UV4]] + ; GFX9-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32), [[SMIN2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_SMIN %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -313,59 +327,61 @@ body: | ; SI-LABEL: name: test_smin_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SMIN]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; VI-LABEL: name: test_smin_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SMIN]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SMIN1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX9-LABEL: name: test_smin_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x i16>) = G_SMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -377,80 +393,82 @@ body: | ; SI-LABEL: name: test_smin_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[SMIN]](i32), [[SMIN1]](i32), [[SMIN2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; VI-LABEL: name: test_smin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN1]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN2]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_IMPLICIT_DEF - %2:_(<3 x s16>) = G_SMIN %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - S_NOP 0, implicit %3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x i16>) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[SMIN]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SMIN1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[LSHR]](i32), [[ANYEXT]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<3 x i16>) = G_IMPLICIT_DEF + %2:_(<3 x i16>) = G_SMIN %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -462,97 +480,99 @@ body: | ; SI-LABEL: name: test_smin_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; SI-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG6]], [[SEXT_INREG7]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMIN2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMIN3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 16 + ; SI-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SEXT_INREG6]], [[SEXT_INREG7]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SMIN]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SMIN2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SMIN3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; ; VI-LABEL: name: test_smin_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SMIN]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SMIN1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SMIN2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SMIN3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SMIN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x i16>) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x i16>) = G_SMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SMIN]](<2 x i16>), [[SMIN1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SMIN %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir index 51fffb7551a12..77c11da2c108c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir @@ -11,22 +11,22 @@ body: | ; GFX8-LABEL: name: test_smulh_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[SMULH]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[SMULH]](i32) ; ; GFX9-LABEL: name: test_smulh_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SMULH]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SMULH %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMULH]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SMULH %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -38,30 +38,30 @@ body: | ; GFX8-LABEL: name: test_smulh_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMULH]](s32), [[SMULH1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[SMULH1:%[0-9]+]]:_(i32) = G_SMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMULH]](i32), [[SMULH1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_smulh_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMULH]](s32), [[SMULH1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SMULH %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMULH1:%[0-9]+]]:_(i32) = G_SMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SMULH]](i32), [[SMULH1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SMULH %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -73,35 +73,35 @@ body: | ; GFX8-LABEL: name: test_smulh_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR]], 16 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](i32) ; ; GFX9-LABEL: name: test_smulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMULH %2, %3 - %5:_(s32) = G_SEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR]], 16 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMULH %2, %3 + %5:_(i32) = G_SEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -113,43 +113,43 @@ body: | ; GFX8-LABEL: name: test_smulh_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C]](i16) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[MUL]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR2]](i16) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 8 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) ; ; GFX9-LABEL: name: test_smulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8 - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SMULH %2, %3 - %5:_(s32) = G_SEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG1]](i32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[MUL]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 8 + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SMULH %2, %3 + %5:_(i32) = G_SEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -160,59 +160,51 @@ body: | ; GFX8-LABEL: name: test_smulh_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16 - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 16 - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG4]](s32), [[SEXT_INREG5]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 16 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 16 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 16 + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG2]], [[SEXT_INREG3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[MUL1]], [[C]](i32) + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR]], 16 + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR1]], 16 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG4]](i32), [[SEXT_INREG5]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_smulh_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16 - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 16 - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 - ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR1]], 16 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG4]](s32), [[SEXT_INREG5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s16>) = G_TRUNC %0 - %3:_(<2 x s16>) = G_TRUNC %1 - %4:_(<2 x s16>) = G_SMULH %2, %3 - %5:_(<2 x s32>) = G_SEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 16 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 16 + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 16 + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG2]], [[SEXT_INREG3]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[MUL1]], [[C]](i32) + ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR]], 16 + ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR1]], 16 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG4]](i32), [[SEXT_INREG5]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %3:_(<2 x i16>) = G_TRUNC %1(<2 x i32>) + %4:_(<2 x i16>) = G_SMULH %2, %3 + %5:_(<2 x i32>) = G_SEXT %4(<2 x i16>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -223,83 +215,83 @@ body: | ; GFX8-LABEL: name: test_smulh_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C]](s16) - ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C]](s16) - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[ASHR3]], [[ASHR4]] - ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR5]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C]](i16) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C]](i16) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[MUL]], [[C]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C]](i16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[C]](i16) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C]](i16) + ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[C]](i16) + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[ASHR3]], [[ASHR4]] + ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(i16) = G_ASHR [[MUL1]], [[C]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[ASHR2]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[ASHR5]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_smulh_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG2]](s32) - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[MUL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %5:_(s8) = G_TRUNC %0 - %6:_(s8) = G_TRUNC %1 - %7:_(s8) = G_TRUNC %2 - %8:_(s8) = G_TRUNC %3 - %11:_(<2 x s8>) = G_BUILD_VECTOR %5, %6 - %12:_(<2 x s8>) = G_BUILD_VECTOR %7, %8 - %13:_(<2 x s8>) = G_SMULH %11, %12 - %14:_(s8), %15:_(s8) = G_UNMERGE_VALUES %13 - %17:_(s16) = G_MERGE_VALUES %14, %15 - %18:_(s32) = G_ANYEXT %17 - $vgpr0 = COPY %18 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY2]], 8 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG2]](i32) + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY3]], 8 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[MUL]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C2]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i8) = G_TRUNC %0(i32) + %5:_(i8) = G_TRUNC %1(i32) + %6:_(i8) = G_TRUNC %2(i32) + %7:_(i8) = G_TRUNC %3(i32) + %8:_(<2 x i8>) = G_BUILD_VECTOR %4(i8), %5(i8) + %9:_(<2 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8) + %10:_(<2 x i8>) = G_SMULH %8, %9 + %11:_(i8), %12:_(i8) = G_UNMERGE_VALUES %10(<2 x i8>) + %13:_(i16) = G_MERGE_VALUES %11(i8), %12(i8) + %14:_(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) ... @@ -311,132 +303,132 @@ body: | ; GFX8-LABEL: name: test_smulh_v4s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C3]](s16) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C3]](s16) - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[ASHR3]], [[ASHR4]] - ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR6:%[0-9]+]]:_(s16) = G_ASHR [[SHL4]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR7:%[0-9]+]]:_(s16) = G_ASHR [[SHL5]], [[C3]](s16) - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[ASHR6]], [[ASHR7]] - ; GFX8-NEXT: [[ASHR8:%[0-9]+]]:_(s16) = G_ASHR [[MUL2]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[TRUNC6]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR9:%[0-9]+]]:_(s16) = G_ASHR [[SHL6]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX8-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC7]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR10:%[0-9]+]]:_(s16) = G_ASHR [[SHL7]], [[C3]](s16) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[ASHR9]], [[ASHR10]] - ; GFX8-NEXT: [[ASHR11:%[0-9]+]]:_(s16) = G_ASHR [[MUL3]], [[C3]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR5]](s16) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] - ; GFX8-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR8]](s16) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C4]] - ; GFX8-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL9]] - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR11]](s16) - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C4]] - ; GFX8-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL10]] - ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[C3]](i16) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[ASHR]], [[ASHR1]] + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[MUL]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[C3]](i16) + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[ASHR3]], [[ASHR4]] + ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(i16) = G_ASHR [[MUL1]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[TRUNC4]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR6:%[0-9]+]]:_(i16) = G_ASHR [[SHL4]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i16) = G_SHL [[TRUNC5]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR7:%[0-9]+]]:_(i16) = G_ASHR [[SHL5]], [[C3]](i16) + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i16) = G_MUL [[ASHR6]], [[ASHR7]] + ; GFX8-NEXT: [[ASHR8:%[0-9]+]]:_(i16) = G_ASHR [[MUL2]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[SHL6:%[0-9]+]]:_(i16) = G_SHL [[TRUNC6]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR9:%[0-9]+]]:_(i16) = G_ASHR [[SHL6]], [[C3]](i16) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX8-NEXT: [[SHL7:%[0-9]+]]:_(i16) = G_SHL [[TRUNC7]], [[C3]](i16) + ; GFX8-NEXT: [[ASHR10:%[0-9]+]]:_(i16) = G_ASHR [[SHL7]], [[C3]](i16) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i16) = G_MUL [[ASHR9]], [[ASHR10]] + ; GFX8-NEXT: [[ASHR11:%[0-9]+]]:_(i16) = G_ASHR [[MUL3]], [[C3]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR2]](i16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C4]] + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR5]](i16) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C4]] + ; GFX8-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL8]] + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR8]](i16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C4]] + ; GFX8-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL9]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR11]](i16) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C4]] + ; GFX8-NEXT: [[SHL10:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL10]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_smulh_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG2]](s32) - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 8 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[MUL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 8 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG4]](s32) - ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 8 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG6]](s32) - ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 8 - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR3]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[MUL1]], [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 - %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) = G_UNMERGE_VALUES %1 - %10:_(<4 x s8>) = G_BUILD_VECTOR %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) - %11:_(<4 x s8>) = G_BUILD_VECTOR %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) - %12:_(<4 x s8>) = G_SMULH %10:_, %11:_ - %13:_(s8), %14:_(s8), %15:_(s8), %16:_(s8) = G_UNMERGE_VALUES %12:_(<4 x s8>) - %17:_(s32) = G_MERGE_VALUES %13, %14, %15, %16 - $vgpr0 = COPY %17 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG]](i32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG2]](i32) + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 8 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C3]](i16), [[C3]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[MUL]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 8 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG4]](i32) + ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 8 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR4]], 8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG6]](i32) + ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR5]], 8 + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[SEXT_INREG7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR3]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C3]](i16), [[C3]](i16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[MUL1]], [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR]](<2 x i16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR6]], [[C4]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR7]], [[C4]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %0(i32) + %6:_(i8), %7:_(i8), %8:_(i8), %9:_(i8) = G_UNMERGE_VALUES %1(i32) + %10:_(<4 x i8>) = G_BUILD_VECTOR %2(i8), %3(i8), %4(i8), %5(i8) + %11:_(<4 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8), %8(i8), %9(i8) + %12:_(<4 x i8>) = G_SMULH %10, %11 + %13:_(i8), %14:_(i8), %15:_(i8), %16:_(i8) = G_UNMERGE_VALUES %12(<4 x i8>) + %17:_(i32) = G_MERGE_VALUES %13(i8), %14(i8), %15(i8), %16(i8) + $vgpr0 = COPY %17(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir index cd75462271a36..3a152ad274578 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir @@ -11,36 +11,36 @@ body: | ; GFX8-LABEL: name: test_smulo_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH]](i32), [[ASHR]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_smulo_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_SMULO %0, %1 - %4:_(s32) = G_SEXT %3 - $vgpr0 = COPY %2 - $vgpr1 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH]](i32), [[ASHR]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_SMULO %0, %1 + %4:_(i32) = G_SEXT %3(i1) + $vgpr0 = COPY %2(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -52,58 +52,58 @@ body: | ; GFX8-LABEL: name: test_smulo_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] - ; GFX8-NEXT: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH1]](s32), [[ASHR1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH]](i32), [[ASHR]] + ; GFX8-NEXT: [[SMULH1:%[0-9]+]]:_(i32) = G_SMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[MUL1]], [[C]](i32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH1]](i32), [[ASHR1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[MUL]](i32), [[MUL1]](i32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) ; ; GFX9-LABEL: name: test_smulo_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] - ; GFX9-NEXT: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH1]](s32), [[ASHR1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SMULO %0, %1 - %4:_(<2 x s32>) = G_SEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH]](i32), [[ASHR]] + ; GFX9-NEXT: [[SMULH1:%[0-9]+]]:_(i32) = G_SMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[MUL1]], [[C]](i32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH1]](i32), [[ASHR1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[MUL]](i32), [[MUL1]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG]](i32), [[SEXT_INREG1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>), %3:_(<2 x i1>) = G_SMULO %0, %1 + %4:_(<2 x i32>) = G_SEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... --- @@ -115,41 +115,41 @@ body: | ; GFX8-LABEL: name: test_smulo_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_smulo_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16), %6:_(s1) = G_SMULO %2, %3 - %5:_(s32) = G_SEXT %4 - %7:_(s32) = G_SEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16), %5:_(i1) = G_SMULO %2, %3 + %6:_(i32) = G_SEXT %4(i16) + %7:_(i32) = G_SEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -161,41 +161,41 @@ body: | ; GFX8-LABEL: name: test_smulo_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_smulo_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8), %6:_(s1) = G_SMULO %2, %3 - %5:_(s32) = G_SEXT %4 - %7:_(s32) = G_SEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8), %5:_(i1) = G_SMULO %2, %3 + %6:_(i32) = G_SEXT %4(i8) + %7:_(i32) = G_SEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -206,76 +206,67 @@ body: | ; GFX8-LABEL: name: test_smulo_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16 - ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 16 - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] - ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 16 - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG6]](s32), [[SEXT_INREG7]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX8-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG8]](s32), [[SEXT_INREG9]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 16 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 16 + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 16 + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL1]], 16 + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[SEXT_INREG5]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG6]](i32), [[SEXT_INREG7]](i32) + ; GFX8-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX8-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL1]], 16 + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG8]](i32), [[SEXT_INREG9]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_smulo_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16 - ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 16 - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] - ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 16 - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG6]](s32), [[SEXT_INREG7]](s32) - ; GFX9-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX9-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 16 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG8]](s32), [[SEXT_INREG9]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s16>) = G_TRUNC %0 - %3:_(<2 x s16>) = G_TRUNC %1 - %4:_(<2 x s16>), %6:_(<2 x s1>) = G_SMULO %2, %3 - %7:_(<2 x s32>) = G_SEXT %6 - %5:_(<2 x s32>) = G_SEXT %4 - $vgpr0_vgpr1 = COPY %5 - $vgpr2_vgpr3 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV2]], 16 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV1]], 16 + ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[UV3]], 16 + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] + ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL1]], 16 + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[SEXT_INREG5]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT]], 1 + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ANYEXT1]], 1 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG6]](i32), [[SEXT_INREG7]](i32) + ; GFX9-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 16 + ; GFX9-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL1]], 16 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SEXT_INREG8]](i32), [[SEXT_INREG9]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %3:_(<2 x i16>) = G_TRUNC %1(<2 x i32>) + %4:_(<2 x i16>), %5:_(<2 x i1>) = G_SMULO %2, %3 + %6:_(<2 x i32>) = G_SEXT %5(<2 x i1>) + %7:_(<2 x i32>) = G_SEXT %4(<2 x i16>) + $vgpr0_vgpr1 = COPY %7(<2 x i32>) + $vgpr2_vgpr3 = COPY %6(<2 x i32>) ... @@ -287,87 +278,87 @@ body: | ; GFX8-LABEL: name: test_smulo_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] - ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 8 - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY2]], 8 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY3]], 8 + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL1]], 8 + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[SEXT_INREG5]] + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[MUL]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[MUL1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) ; ; GFX9-LABEL: name: test_smulo_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] - ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 8 - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %5:_(s8) = G_TRUNC %0 - %6:_(s8) = G_TRUNC %1 - %7:_(s8) = G_TRUNC %2 - %8:_(s8) = G_TRUNC %3 - %11:_(<2 x s8>) = G_BUILD_VECTOR %5, %6 - %12:_(<2 x s8>) = G_BUILD_VECTOR %7, %8 - %13:_(<2 x s8>), %19:_(<2 x s1>) = G_SMULO %11, %12 - %20:_(<2 x s32>) = G_SEXT %19 - %14:_(s8), %15:_(s8) = G_UNMERGE_VALUES %13 - %21:_(s1), %22:_(s1) = G_UNMERGE_VALUES %19 - %17:_(s16) = G_MERGE_VALUES %14, %15 - %18:_(s32) = G_ANYEXT %17 - %23:_(s32) = G_ANYEXT %21 - %24:_(s32) = G_ANYEXT %22 - $vgpr0 = COPY %18 - $vgpr1 = COPY %23 - $vgpr2 = COPY %24 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY2]], 8 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY3]], 8 + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] + ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL1]], 8 + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[SEXT_INREG5]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[MUL]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[MUL1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i8) = G_TRUNC %0(i32) + %5:_(i8) = G_TRUNC %1(i32) + %6:_(i8) = G_TRUNC %2(i32) + %7:_(i8) = G_TRUNC %3(i32) + %8:_(<2 x i8>) = G_BUILD_VECTOR %4(i8), %5(i8) + %9:_(<2 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8) + %10:_(<2 x i8>), %11:_(<2 x i1>) = G_SMULO %8, %9 + %12:_(<2 x i32>) = G_SEXT %11(<2 x i1>) + %13:_(i8), %14:_(i8) = G_UNMERGE_VALUES %10(<2 x i8>) + %15:_(i1), %16:_(i1) = G_UNMERGE_VALUES %11(<2 x i1>) + %17:_(i16) = G_MERGE_VALUES %13(i8), %14(i8) + %18:_(i32) = G_ANYEXT %17(i16) + %19:_(i32) = G_ANYEXT %15(i1) + %20:_(i32) = G_ANYEXT %16(i1) + $vgpr0 = COPY %18(i32) + $vgpr1 = COPY %19(i32) + $vgpr2 = COPY %20(i32) ... --- @@ -378,101 +369,101 @@ body: | ; GFX8-LABEL: name: test_smulo_v4s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 8 - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] - ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 8 - ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 8 - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG5]], [[SEXT_INREG6]] - ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 8 - ; GFX8-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 8 - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG7]], [[SEXT_INREG8]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 8 + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 8 + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 8 + ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR4]], 8 + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG5]], [[SEXT_INREG6]] + ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 8 + ; GFX8-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR5]], 8 + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG7]], [[SEXT_INREG8]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C3]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL2]], [[C3]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL3]], [[C3]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_smulo_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 8 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 8 - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] - ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 8 - ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 8 - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG5]], [[SEXT_INREG6]] - ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 8 - ; GFX9-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 8 - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG7]], [[SEXT_INREG8]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 - %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) = G_UNMERGE_VALUES %1 - %10:_(<4 x s8>) = G_BUILD_VECTOR %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) - %11:_(<4 x s8>) = G_BUILD_VECTOR %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) - %12:_(<4 x s8>), %18:_(<4 x s1>) = G_SMULO %10:_, %11:_ - %13:_(s8), %14:_(s8), %15:_(s8), %16:_(s8) = G_UNMERGE_VALUES %12:_(<4 x s8>) - %19:_(s1), %20:_(s1), %21:_(s1), %22:_(s1) = G_UNMERGE_VALUES %18:_(<4 x s1>) - %17:_(s32) = G_MERGE_VALUES %13, %14, %15, %16 - %23:_(s32) = G_ANYEXT %19 - $vgpr0 = COPY %17 - $vgpr1 = COPY %23 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 8 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 8 + ; GFX9-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 8 + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] + ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 8 + ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR4]], 8 + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG5]], [[SEXT_INREG6]] + ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 8 + ; GFX9-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR5]], 8 + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG7]], [[SEXT_INREG8]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C3]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL2]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C2]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %0(i32) + %6:_(i8), %7:_(i8), %8:_(i8), %9:_(i8) = G_UNMERGE_VALUES %1(i32) + %10:_(<4 x i8>) = G_BUILD_VECTOR %2(i8), %3(i8), %4(i8), %5(i8) + %11:_(<4 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8), %8(i8), %9(i8) + %12:_(<4 x i8>), %13:_(<4 x i1>) = G_SMULO %10, %11 + %14:_(i8), %15:_(i8), %16:_(i8), %17:_(i8) = G_UNMERGE_VALUES %12(<4 x i8>) + %18:_(i1), %19:_(i1), %20:_(i1), %21:_(i1) = G_UNMERGE_VALUES %13(<4 x i1>) + %22:_(i32) = G_MERGE_VALUES %14(i8), %15(i8), %16(i8), %17(i8) + %23:_(i32) = G_ANYEXT %18(i1) + $vgpr0 = COPY %22(i32) + $vgpr1 = COPY %23(i32) ... --- name: test_smulo_s24 @@ -483,50 +474,49 @@ body: | ; GFX8-LABEL: name: test_smulo_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 24 - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 24 - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 24 - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 24 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 24 + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH]](i32), [[ASHR]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 24 + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 24 + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[OR]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](i32) ; ; GFX9-LABEL: name: test_smulo_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 24 - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]] - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 24 - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[SEXT_INREG2]] - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 24 - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24), %6:_(s1) = G_SMULO %2, %3 - %5:_(s32) = G_SEXT %4 - %7:_(s32) = G_SEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 24 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 24 + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:_(i32) = G_SMULH [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[MUL]], [[C]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SMULH]](i32), [[ASHR]] + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 24 + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[SEXT_INREG2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[MUL]], 24 + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[OR]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[SEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24), %5:_(i1) = G_SMULO %2, %3 + %6:_(i32) = G_SEXT %4(i24) + %7:_(i32) = G_SEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index 08bb589b6ded2..583723e73d5aa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -14,143 +14,146 @@ body: | ; GFX6-LABEL: name: test_srem_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX8-LABEL: name: test_srem_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX9-LABEL: name: test_srem_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX10-LABEL: name: test_srem_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SREM %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SREM %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -162,255 +165,258 @@ body: | ; GFX6-LABEL: name: test_srem_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR4]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB4]](i32), [[SUB9]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX8-LABEL: name: test_srem_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR4]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB4]](i32), [[SUB9]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_srem_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR4]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB4]](i32), [[SUB9]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX10-LABEL: name: test_srem_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SREM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV2]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[UV3]], [[C]](i32) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR4]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB4]](i32), [[SUB9]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SREM %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -422,675 +428,678 @@ body: | ; GFX6-LABEL: name: test_srem_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV24]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX8-LABEL: name: test_srem_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV34]], [[UV36]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX9-LABEL: name: test_srem_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV34]], [[UV36]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX10-LABEL: name: test_srem_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV26]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV27]], [[USUBO5]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[UV30]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[UV31]], [[USUBO9]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SREM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV26]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV27]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV28]], [[UV30]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV29]], [[UV31]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SREM %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1102,1303 +1111,1306 @@ body: | ; GFX6-LABEL: name: test_srem_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] - ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] - ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) - ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) - ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] - ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] - ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] - ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] - ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] - ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] - ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] - ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX6-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] - ; GFX6-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] - ; GFX6-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] - ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] - ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] - ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX6-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] - ; GFX6-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] - ; GFX6-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] - ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) - ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] - ; GFX6-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] - ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[ADD17]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV28]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UV30]], [[UV32]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV38]](i32) + ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV39]](i32) + ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV40]], [[UV42]] + ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] + ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[ADD20]] + ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[USUBO10]], [[UADDO52]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[USUBO10]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[USUBO10]], [[UADDO52]] + ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UADDO52]], [[ADD25]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[MUL24]] + ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[ADD25]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[ADD25]] + ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[UADDO60]], [[ADD26]] + ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[ADD25]] + ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(i32) = G_MUL [[UV47]], [[UADDO64]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(i32) = G_MUL [[UV46]], [[UADDE14]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(i32) = G_UMULH [[UV46]], [[UADDO64]] + ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(i32) = G_MUL [[UV47]], [[UADDE14]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(i32) = G_UMULH [[UV47]], [[UADDO64]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(i32) = G_UMULH [[UV46]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[ADD30]] + ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(i32) = G_UMULH [[UV47]], [[UADDE14]] + ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(i32) = G_MUL [[UV48]], [[UADDO74]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(i32) = G_MUL [[UV49]], [[UADDO74]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(i32) = G_MUL [[UV48]], [[ADD33]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(i32) = G_UMULH [[UV48]], [[UADDO74]] + ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[UV44]], [[MUL33]] + ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV45]], [[ADD35]] + ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE16]](i32) + ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV51]] + ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV50]] + ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV51]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX6-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV50]] + ; GFX6-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX6-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE20]](i32) + ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE20]](i32), [[UV51]] + ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO14]](i32), [[UV50]] + ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE20]](i32), [[UV51]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX6-NEXT: [[USUBO16:%[0-9]+]]:_(i32), [[USUBO17:%[0-9]+]]:_(i1) = G_USUBO [[USUBO14]], [[UV50]] + ; GFX6-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX6-NEXT: [[USUBE24:%[0-9]+]]:_(i32), [[USUBE25:%[0-9]+]]:_(i1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO16]](i32), [[USUBE24]](i32) + ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX6-NEXT: [[USUBO18:%[0-9]+]]:_(i32), [[USUBO19:%[0-9]+]]:_(i1) = G_USUBO [[UV52]], [[UV54]] + ; GFX6-NEXT: [[USUBE26:%[0-9]+]]:_(i32), [[USUBE27:%[0-9]+]]:_(i1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO18]](i32), [[USUBE26]](i32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; GFX8-LABEL: name: test_srem_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV36]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV37]], [[USUBO5]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO9]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] - ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] - ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV50]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV51]](s32) - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] - ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV57]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] - ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] - ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] - ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] - ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV65]], [[UV72]] - ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV75]] - ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV74]] - ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV75]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV74]] - ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV75]], [[USUBO13]] - ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] - ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV75]] - ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV74]] - ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV75]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV74]] - ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV75]], [[USUBO15]] - ; GFX8-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] - ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) - ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV76]], [[UV78]] - ; GFX8-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV77]], [[UV79]], [[USUBO19]] - ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV23]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV33]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV31]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV32]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV27]], [[UV34]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV37]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV36]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV37]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV37]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV36]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV37]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV36]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV37]], [[USUBO5]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV40]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[UV41]], [[USUBO9]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UV42]], [[UV44]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] + ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] + ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV50]](i32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV51]](i32) + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV52]], [[UV54]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV53]], [[UV55]], [[USUBO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[FPTOUI2]], [[C5]] + ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV57]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV56]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV58]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV56]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH12]] + ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV58]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV56]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV58]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH14]] + ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[ADD12]] + ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV58]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[UADDO52]], [[C5]] + ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV61]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[UADDE12]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](i32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[UV60]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO52]], [[UV62]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[UV60]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH16]] + ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[UV62]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[UV60]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[UV62]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH18]] + ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[UADDO60]], [[ADD16]] + ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[UV62]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV67]], [[UADDO64]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV66]], [[UADDE14]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV66]], [[UADDO64]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH20]] + ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV67]], [[UADDE14]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV67]], [[UADDO64]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV66]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH22]] + ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[ADD20]] + ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV67]], [[UADDE14]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(i32), [[UV69:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV68]](i32), [[UADDO74]], [[C5]] + ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(i32), [[UV71:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV71]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV68]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV69]](i32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(i32), [[UV73:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[UV64]], [[UV70]] + ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV65]], [[UV72]] + ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE16]](i32) + ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(i32), [[UV75:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV75]] + ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV74]] + ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV75]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV74]] + ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV75]], [[USUBO13]] + ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE20]](i32) + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE20]](i32), [[UV75]] + ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO14]](i32), [[UV74]] + ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE20]](i32), [[UV75]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX8-NEXT: [[USUBO16:%[0-9]+]]:_(i32), [[USUBO17:%[0-9]+]]:_(i1) = G_USUBO [[USUBO14]], [[UV74]] + ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[UV75]], [[USUBO15]] + ; GFX8-NEXT: [[USUBE24:%[0-9]+]]:_(i32), [[USUBE25:%[0-9]+]]:_(i1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO16]](i32), [[USUBE24]](i32) + ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(i32), [[UV77:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX8-NEXT: [[UV78:%[0-9]+]]:_(i32), [[UV79:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX8-NEXT: [[USUBO18:%[0-9]+]]:_(i32), [[USUBO19:%[0-9]+]]:_(i1) = G_USUBO [[UV76]], [[UV78]] + ; GFX8-NEXT: [[USUBE26:%[0-9]+]]:_(i32), [[USUBE27:%[0-9]+]]:_(i1) = G_USUBE [[UV77]], [[UV79]], [[USUBO19]] + ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO18]](i32), [[USUBE26]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; GFX9-LABEL: name: test_srem_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV36]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV37]], [[USUBO5]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO9]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV50]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV51]](s32) - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] - ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV57]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] - ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] - ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] - ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV65]], [[UV72]] - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV75]] - ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV74]] - ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV75]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV74]] - ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV75]], [[USUBO13]] - ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] - ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV75]] - ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV74]] - ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV75]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV74]] - ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV75]], [[USUBO15]] - ; GFX9-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] - ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) - ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV76]], [[UV78]] - ; GFX9-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV77]], [[UV79]], [[USUBO19]] - ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV23]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV33]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV30]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV31]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV32]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV27]], [[UV34]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV37]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV36]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV37]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV37]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV36]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV37]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV36]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV37]], [[USUBO5]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV40]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[UV41]], [[USUBO9]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UV42]], [[UV44]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV50]](i32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV51]](i32) + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV52]], [[UV54]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV53]], [[UV55]], [[USUBO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[FPTOUI2]], [[C5]] + ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV57]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV56]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV58]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV56]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH12]] + ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV58]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV56]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV58]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH14]] + ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[ADD12]] + ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV58]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[UADDO52]], [[C5]] + ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV61]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[UADDE12]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](i32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[UV60]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO52]], [[UV62]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[UV60]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH16]] + ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[UV62]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[UV60]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[UV62]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH18]] + ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[UADDO60]], [[ADD16]] + ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[UV62]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV67]], [[UADDO64]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV66]], [[UADDE14]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV66]], [[UADDO64]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH20]] + ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV67]], [[UADDE14]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV67]], [[UADDO64]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV66]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH22]] + ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[ADD20]] + ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV67]], [[UADDE14]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(i32), [[UV69:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV68]](i32), [[UADDO74]], [[C5]] + ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(i32), [[UV71:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV71]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV68]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV69]](i32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(i32), [[UV73:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[UV64]], [[UV70]] + ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV65]], [[UV72]] + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE16]](i32) + ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(i32), [[UV75:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV75]] + ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV74]] + ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV75]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV74]] + ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV75]], [[USUBO13]] + ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE20]](i32) + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE20]](i32), [[UV75]] + ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO14]](i32), [[UV74]] + ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE20]](i32), [[UV75]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX9-NEXT: [[USUBO16:%[0-9]+]]:_(i32), [[USUBO17:%[0-9]+]]:_(i1) = G_USUBO [[USUBO14]], [[UV74]] + ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[UV75]], [[USUBO15]] + ; GFX9-NEXT: [[USUBE24:%[0-9]+]]:_(i32), [[USUBE25:%[0-9]+]]:_(i1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO16]](i32), [[USUBE24]](i32) + ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(i32), [[UV77:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(i32), [[UV79:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX9-NEXT: [[USUBO18:%[0-9]+]]:_(i32), [[USUBO19:%[0-9]+]]:_(i1) = G_USUBO [[UV76]], [[UV78]] + ; GFX9-NEXT: [[USUBE26:%[0-9]+]]:_(i32), [[USUBE27:%[0-9]+]]:_(i1) = G_USUBE [[UV77]], [[UV79]], [[USUBO19]] + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO18]](i32), [[USUBE26]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; ; GFX10-LABEL: name: test_srem_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV30]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV31]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV30]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV31]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV30]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV31]], [[USUBO5]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO39]] - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO41]] - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] - ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV44]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV45]](s32) - ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV46]], [[UV48]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV47]], [[UV49]], [[USUBO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] - ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV50]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV50]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV50]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] - ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV52]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV52]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV52]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] - ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO64]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO64]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO64]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE14]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE14]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDO74]], [[C5]] - ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDO74]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] - ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV54]], [[UV60]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD35]], [[USUBO13]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD35]] - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV63]] - ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV62]] - ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV63]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV62]] - ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV63]], [[USUBO13]] - ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] - ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV63]] - ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV62]] - ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV63]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV62]] - ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV63]], [[USUBO15]] - ; GFX10-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] - ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) - ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] - ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] - ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV66]] - ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV67]], [[USUBO19]] - ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_SREM %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[UV]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[UV2]], [[C]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV12]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV13]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV16]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV19]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV26]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV27]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV31]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV30]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV31]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV30]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV31]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV31]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV30]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV31]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV30]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV31]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV32]], [[UV34]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(i64) = G_ASHR [[UV1]], [[C]](i32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(i64) = G_ASHR [[UV3]], [[C]](i32) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[UV36]], [[UV38]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[UV37]], [[UV39]], [[UADDO39]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO38]](i32), [[UADDE8]](i32) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR3]](i64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UV41]], [[UV43]], [[UADDO41]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO40]](i32), [[UADDE10]](i32) + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV44]](i32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV45]](i32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV46]], [[UV48]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV47]], [[UV49]], [[USUBO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[FPTOUI2]], [[C5]] + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[UV51]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV50]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV50]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV50]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH14]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD22]] + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](i32), [[UADDO52]], [[C5]] + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](i64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO10]], [[UADDE12]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[UV53]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[UV52]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[UV52]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[UV52]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[UADDO60]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD28]] + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR3]](i64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[UV57]], [[UADDO64]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[UV56]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV56]], [[UADDO64]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH20]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UV57]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV57]], [[UADDO64]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV56]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO73]](i1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO75]](i1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV57]], [[UADDE14]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV58]](i32), [[UADDO74]], [[C5]] + ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UV58]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[UV61]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UV59]], [[UADDO74]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[UV54]], [[UV60]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[UV55]], [[ADD35]], [[USUBO13]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV55]], [[ADD35]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE16]](i32) + ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR4]](i64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE16]](i32), [[UV63]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV62]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE16]](i32), [[UV63]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV62]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV63]], [[USUBO13]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE20]](i32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE20]](i32), [[UV63]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO14]](i32), [[UV62]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE20]](i32), [[UV63]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(i32), [[USUBO17:%[0-9]+]]:_(i1) = G_USUBO [[USUBO14]], [[UV62]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE18]], [[UV63]], [[USUBO15]] + ; GFX10-NEXT: [[USUBE24:%[0-9]+]]:_(i32), [[USUBE25:%[0-9]+]]:_(i1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO16]](i32), [[USUBE24]](i32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV10]], [[MV9]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV8]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(i64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(i32), [[UV65:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR5]](i64) + ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(i32), [[UV67:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR2]](i64) + ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(i32), [[USUBO19:%[0-9]+]]:_(i1) = G_USUBO [[UV64]], [[UV66]] + ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(i32), [[USUBE27:%[0-9]+]]:_(i1) = G_USUBE [[UV65]], [[UV67]], [[USUBO19]] + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO18]](i32), [[USUBE26]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV5]](i64), [[MV11]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_SREM %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -2410,162 +2422,165 @@ body: | ; GFX6-LABEL: name: test_srem_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C3]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND]](i32) + ; ; GFX8-LABEL: name: test_srem_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C3]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND]](i32) + ; ; GFX9-LABEL: name: test_srem_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C3]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](i32) + ; ; GFX10-LABEL: name: test_srem_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SREM %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C3]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SREM %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -2577,297 +2592,300 @@ body: | ; GFX6-LABEL: name: test_srem_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB9]], [[C4]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR4]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB9]], [[C4]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX8-LABEL: name: test_srem_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C4]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB9]], [[C4]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR4]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB4]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB9]], [[C4]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX9-LABEL: name: test_srem_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB4]](s32) - ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB9]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SUB4]](i32) + ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR4]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB9]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + ; ; GFX10-LABEL: name: test_srem_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB4]](s32) - ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) - ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] - ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] - ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] - ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] - ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB9]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SREM %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C1]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C1]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SUB4]](i32) + ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG2]], [[C1]](i32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG3]], [[C1]](i32) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(i32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[XOR4]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[XOR4]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB6]](i32), [[XOR4]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB7]], [[SUB6]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[XOR4]] + ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB8]], [[SELECT2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(i32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB9]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SREM %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -2879,154 +2897,157 @@ body: | ; GFX6-LABEL: name: test_srem_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX8-LABEL: name: test_srem_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX9-LABEL: name: test_srem_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX10-LABEL: name: test_srem_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SREM %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SREM %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -3038,154 +3059,157 @@ body: | ; GFX6-LABEL: name: test_srem_s17 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX8-LABEL: name: test_srem_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX9-LABEL: name: test_srem_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](i32) + ; ; GFX10-LABEL: name: test_srem_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] - ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_SREM %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 17 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[XOR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_SREM %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -3197,684 +3221,687 @@ body: | ; GFX6-LABEL: name: test_srem_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV20]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV21]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV24]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX8-LABEL: name: test_srem_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV34]], [[UV36]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX9-LABEL: name: test_srem_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV19]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV29]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV26]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV27]](i32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV23]], [[UV30]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV32]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV33]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV34]], [[UV36]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + ; ; GFX10-LABEL: name: test_srem_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 - ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV26]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV27]], [[USUBO5]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] - ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[UV30]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[UV31]], [[USUBO9]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_SREM %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY]], 33 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SEXT_INREG1]], [[C]](i32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SEXT_INREG1]](i64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV8]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV9]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV15]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[UADDO8]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO20]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO23]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV26]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV27]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV26]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV27]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR2]](i64) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV28]], [[UV30]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV29]], [[UV31]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO8]](i32), [[USUBE12]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_SREM %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 18672a1f5fc0b..7ed3423553c32 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -14,78 +14,78 @@ body: | ; GFX6-LABEL: name: sshlsat_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C1]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](i32) ; ; GFX8-LABEL: name: sshlsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[AND]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i16), [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SELECT1]], [[C1]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: sshlsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SSHLSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[AND]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i16), [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SELECT1]], [[C1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SSHLSAT %2, %3(i7) + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -97,78 +97,78 @@ body: | ; GFX6-LABEL: name: sshlsat_s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C1]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](i32) ; ; GFX8-LABEL: name: sshlsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[AND]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i16), [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SELECT1]], [[C1]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: sshlsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SSHLSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[AND]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i16), [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SELECT1]], [[C1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SSHLSAT %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -180,138 +180,138 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C1]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[AND1]](i32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SHL3]], [[AND1]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL2]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C3]], [[C4]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SELECT3]], [[C1]](i32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASHR1]](i32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C6]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY2]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: sshlsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[AND]](i16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i16), [[C5]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SELECT1]], [[C2]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C2]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[SHL2]], [[AND1]](i16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[AND1]](i16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL2]](i16), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C3]], [[C4]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL3]] + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SELECT3]], [[C2]](i16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[ASHR3]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C2]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: sshlsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s8>) = G_BITCAST %2 - %5:_(<2 x s8>) = G_BITCAST %3 - %6:_(<2 x s8>) = G_SSHLSAT %4, %5 - %7:_(s16) = G_BITCAST %6 - %8:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[AND]](i16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i16), [[C5]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SELECT1]], [[C2]](i16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C2]](i16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[SHL2]], [[AND1]](i16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[AND1]](i16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL2]](i16), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C3]], [[C4]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL3]] + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SELECT3]], [[C2]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[ASHR3]], [[C1]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i8>) = G_BITCAST %2(i16) + %5:_(<2 x i8>) = G_BITCAST %3(i16) + %6:_(<2 x i8>) = G_SSHLSAT %4, %5(<2 x i8>) + %7:_(i16) = G_BITCAST %6(<2 x i8>) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -323,68 +323,68 @@ body: | ; GFX6-LABEL: name: sshlsat_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C1]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](i32) ; ; GFX8-LABEL: name: sshlsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC1]](i16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: sshlsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SSHLSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SSHLSAT %2, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -396,112 +396,112 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[LSHR1]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[LSHR1]](i32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SHL3]], [[LSHR1]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL2]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SELECT3]], [[C]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: sshlsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC2]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC1]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT3]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: sshlsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT1]](s16), [[SELECT3]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SSHLSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC2]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC1]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT1]](i16), [[SELECT3]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SSHLSAT %0, %1(<2 x i16>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -513,186 +513,186 @@ body: | ; GFX6-LABEL: name: sshlsat_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[LSHR1]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[LSHR1]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[LSHR2]](s32) - ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[LSHR2]](s32) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C3]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] - ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C4]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[LSHR1]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[LSHR1]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C]](i32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C4]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SHL3]], [[AND]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL2]](i32), [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SELECT3]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[SHL4]], [[LSHR2]](i32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(i32) = G_ASHR [[SHL5]], [[LSHR2]](i32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL4]](i32), [[C3]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP4]](i1), [[C1]], [[C2]] + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL4]](i32), [[ASHR4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SELECT4]], [[SHL5]] + ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(i32) = G_ASHR [[SELECT5]], [[C]](i32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND1]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR5]], [[C4]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND3]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC1]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC2]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i16) = G_SELECT [[ICMP4]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i16) = G_SELECT [[ICMP5]](i1), [[SELECT4]], [[SHL2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT3]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT5]](i16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT1]](s16), [[SELECT3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_SSHLSAT %1, %2 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC1]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC2]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i16) = G_SELECT [[ICMP4]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i16) = G_SELECT [[ICMP5]](i1), [[SELECT4]], [[SHL2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT1]](i16), [[SELECT3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT5]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC7]](i16), [[TRUNC8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_SSHLSAT %1, %2(<3 x i16>) + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -704,193 +704,193 @@ body: | ; GFX6-LABEL: name: sshlsat_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR2]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[LSHR2]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND1]](s32) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] - ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[LSHR3]](s32) - ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[LSHR3]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] - ; GFX6-NEXT: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SELECT1]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[LSHR2]](i32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SHL3]], [[LSHR2]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL2]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SELECT3]], [[C]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[SHL4]], [[AND1]](i32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(i32) = G_ASHR [[SHL5]], [[AND1]](i32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL4]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP4]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL4]](i32), [[ASHR4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SELECT4]], [[SHL5]] + ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(i32) = G_ASHR [[SELECT5]], [[C]](i32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[SHL6]], [[LSHR3]](i32) + ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(i32) = G_ASHR [[SHL7]], [[LSHR3]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SHL6]](i32), [[C4]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(i32) = G_SELECT [[ICMP6]](i1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL6]](i32), [[ASHR6]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(i32) = G_SELECT [[ICMP7]](i1), [[SELECT6]], [[SHL7]] + ; GFX6-NEXT: [[ASHR7:%[0-9]+]]:_(i32) = G_ASHR [[SELECT7]], [[C]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ASHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: sshlsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] - ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] - ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT7]](s16) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC1]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC6]](i16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[TRUNC6]](i16) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC2]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i16) = G_SELECT [[ICMP4]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i16) = G_SELECT [[ICMP5]](i1), [[SELECT4]], [[SHL2]] + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[TRUNC7]](i16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[TRUNC7]](i16) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC3]](i16), [[C3]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(i16) = G_SELECT [[ICMP6]](i1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC3]](i16), [[ASHR3]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(i16) = G_SELECT [[ICMP7]](i1), [[SELECT6]], [[SHL3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT3]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT5]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT7]](i16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: sshlsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT1]](s16), [[SELECT3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT5]](s16), [[SELECT7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SSHLSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SHL]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SHL1]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC1]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC6]](i16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(i16) = G_ASHR [[SHL2]], [[TRUNC6]](i16) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC2]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i16) = G_SELECT [[ICMP4]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i16) = G_SELECT [[ICMP5]](i1), [[SELECT4]], [[SHL2]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[TRUNC7]](i16) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(i16) = G_ASHR [[SHL3]], [[TRUNC7]](i16) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[TRUNC3]](i16), [[C3]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i16) = G_SELECT [[ICMP6]](i1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC3]](i16), [[ASHR3]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i16) = G_SELECT [[ICMP7]](i1), [[SELECT6]], [[SHL3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT1]](i16), [[SELECT3]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT5]](i16), [[SELECT7]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SSHLSAT %0, %1(<4 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -902,54 +902,54 @@ body: | ; GFX6-LABEL: name: sshlsat_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL]], [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX8-LABEL: name: sshlsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL]], [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i32), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX9-LABEL: name: sshlsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SSHLSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL]], [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SSHLSAT %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -961,81 +961,81 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL]], [[UV2]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV]](i32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[UV3]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV1]](i32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[ASHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: sshlsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL]], [[UV2]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV]](i32), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[UV3]](i32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV1]](i32), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: sshlsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SSHLSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SHL]], [[UV2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV]](i32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SHL1]], [[UV3]](i32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV1]](i32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SSHLSAT %0, %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -1047,57 +1047,57 @@ body: | ; GFX6-LABEL: name: sshlsat_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SHL]], [[TRUNC]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](i64) ; ; GFX8-LABEL: name: sshlsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SHL]], [[TRUNC]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](i64) ; ; GFX9-LABEL: name: sshlsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SSHLSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SHL]], [[TRUNC]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SSHLSAT %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1109,85 +1109,85 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[TRUNC]](i32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SHL]], [[TRUNC]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV]](i64), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[TRUNC1]](i32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SHL1]], [[TRUNC1]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV1]](i64), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[ASHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT1]](i64), [[SELECT3]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: sshlsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[TRUNC]](i32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SHL]], [[TRUNC]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV]](i64), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[TRUNC1]](i32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SHL1]], [[TRUNC1]](i32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV1]](i64), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT1]](i64), [[SELECT3]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: sshlsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_SSHLSAT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[TRUNC]](i32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[SHL]], [[TRUNC]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV]](i64), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[TRUNC1]](i32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[SHL1]], [[TRUNC1]](i32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[UV1]](i64), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP2]](i1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP3]](i1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT1]](i64), [[SELECT3]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_SSHLSAT %0, %1(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir index 49c947e3fc39a..ea92a34f96bad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: test_ssube_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[COPY]], [[COPY1]], [[ICMP]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SSUBE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_SSUBE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(i32), [[SSUBE1:%[0-9]+]]:_(i1) = G_SSUBE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SSUBE1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[SSUBE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_SSUBE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -39,36 +39,36 @@ body: | ; CHECK-LABEL: name: test_ssube_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[UV2]], [[UV4]], [[ICMP]] - ; CHECK-NEXT: [[SSUBE2:%[0-9]+]]:_(s32), [[SSUBE3:%[0-9]+]]:_(s1) = G_SSUBE [[UV3]], [[UV5]], [[ICMP1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBE]](s32), [[SSUBE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %3 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %2, %4 - %6:_(<2 x s32>), %7:_(<2 x s1>) = G_SSUBE %0, %1, %5 - %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %6 - $vgpr2_vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV]](i32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[C]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(i32), [[SSUBE1:%[0-9]+]]:_(i1) = G_SSUBE [[UV2]], [[UV4]], [[ICMP]] + ; CHECK-NEXT: [[SSUBE2:%[0-9]+]]:_(i32), [[SSUBE3:%[0-9]+]]:_(i1) = G_SSUBE [[UV3]], [[UV5]], [[ICMP1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SSUBE]](i32), [[SSUBE2]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SSUBE1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[SSUBE3]](i1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(<2 x i32>) = G_BUILD_VECTOR %3(i32), %3(i32) + %5:_(<2 x i1>) = G_ICMP intpred(eq), %2(<2 x i32>), %4 + %6:_(<2 x i32>), %7:_(<2 x i1>) = G_SSUBE %0, %1, %5 + %8:_(<2 x i32>) = G_ZEXT %7(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) + $vgpr2_vgpr3 = COPY %8(<2 x i32>) ... --- name: test_ssube_s16 @@ -79,30 +79,30 @@ body: | ; CHECK-LABEL: name: test_ssube_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %13, 16 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[COPY2]], [[ICMP]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s16) = G_TRUNC %0 - %6:_(s16) = G_TRUNC %1 - %7:_(s16), %8:_(s1) = G_SSUBE %6, %7, %4 - %9:_(s32) = G_ANYEXT %7 - %10:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG %13, 16 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[SEXT_INREG1]](i32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[SEXT_INREG]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[USUBE]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i16) = G_TRUNC %0(i32) + %6:_(i16) = G_TRUNC %1(i32) + %7:_(i16), %8:_(i1) = G_SSUBE %6, %7, %4 + %9:_(i32) = G_ANYEXT %7(i16) + %10:_(i32) = G_ZEXT %8(i1) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) ... --- @@ -114,26 +114,26 @@ body: | ; CHECK-LABEL: name: test_ssube_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV]], [[UV2]], [[ICMP]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[UV1]], [[UV3]], [[USUBE1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE]](s32), [[SSUBE]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE1]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s64), %6:_(s1) = G_SSUBE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0_vgpr1 = COPY %5 - $vgpr2 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV]], [[UV2]], [[ICMP]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(i32), [[SSUBE1:%[0-9]+]]:_(i1) = G_SSUBE [[UV1]], [[UV3]], [[USUBE1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBE]](i32), [[SSUBE]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SSUBE1]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i64), %6:_(i1) = G_SSUBE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0_vgpr1 = COPY %5(i64) + $vgpr2 = COPY %7(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir index aa59de0118ad6..83009c9ac8d5e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -10,30 +10,30 @@ body: | ; CHECK-LABEL: name: test_ssubo_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 7 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7), %5:_(s1) = G_SSUBO %2, %3 - %6:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB]], 7 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 7 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7), %5:_(i1) = G_SSUBO %2, %3 + %6:_(i32) = G_ZEXT %4(i7) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -45,28 +45,28 @@ body: | ; CHECK-LABEL: name: test_ssubo_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16), %5:_(s1) = G_SSUBO %2, %3 - %6:_(s32) = G_ANYEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[COPY1]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16), %5:_(i1) = G_SSUBO %2, %3 + %6:_(i32) = G_ANYEXT %4(i16) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -78,23 +78,23 @@ body: | ; CHECK-LABEL: name: test_ssubo_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[COPY]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_SSUBO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[SUB]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_SSUBO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0 = COPY %2(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -106,27 +106,27 @@ body: | ; CHECK-LABEL: name: test_ssubo_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64), %3:_(s1) = G_SSUBO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY1]](i64), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[XOR]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64), %3:_(i1) = G_SSUBO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0_vgpr1 = COPY %2(i64) + $vgpr2 = COPY %4(i32) ... --- @@ -138,54 +138,54 @@ body: | ; CHECK-LABEL: name: test_ssubo_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>), %3:_(<2 x s1>) = G_SSUBO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1_vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST3]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](i32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](i32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND2]](i32), [[AND3]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x i16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>), %3:_(<2 x i1>) = G_SSUBO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0 = COPY %2(<2 x i16>) + $vgpr1_vgpr2 = COPY %4(<2 x i32>) ... --- @@ -196,90 +196,90 @@ body: | ; CHECK-LABEL: name: test_ssubo_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] - ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] - ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>), %7:_(<3 x s1>) = G_SSUBO %2, %4 - %8:_(<3 x s16>) = G_IMPLICIT_DEF - %9:_(<6 x s16>) = G_CONCAT_VECTORS %6, %8 - %10:_(<3 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %9 - $vgpr0_vgpr1_vgpr2 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST5]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG4]](i32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV10]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](i32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](i32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](i32), [[C1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP4]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i1) = G_XOR [[ICMP5]], [[ICMP2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR2]](i1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV13]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x i16>), [[BITCAST11]](<2 x i16>), [[BITCAST12]](<2 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND5]](i32), [[AND6]](i32), [[AND7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>), %7:_(<3 x i1>) = G_SSUBO %2, %4 + %8:_(<3 x i16>) = G_IMPLICIT_DEF + %9:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %8(<3 x i16>) + %10:_(<3 x i32>) = G_ZEXT %7(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %9(<6 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %10(<3 x i32>) ... --- @@ -291,92 +291,92 @@ body: | ; CHECK-LABEL: name: test_ssubo_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 - ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB3]], 16 - ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] - ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] - ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] - ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s16>) = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x s16>) - ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 - %2:_(<4 x s16>), %3:_(<4 x s1>) = G_SSUBO %0, %1 - %4:_(<4 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG]](i32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR4]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG2]](i32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG4]](i32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(i32) = G_SEXT_INREG [[SUB3]], 16 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR5]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SEXT_INREG6]](i32), [[SEXT_INREG7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST8]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST8]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](i32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR6]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](i32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(i32) = G_SEXT_INREG [[BITCAST9]], 16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](i32), [[COPY4]] + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LSHR7]], 16 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](i32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP4]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP5]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i1) = G_XOR [[ICMP6]], [[ICMP2]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(i1) = G_XOR [[ICMP7]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR2]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR3]](i1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x i16>) = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND4]](i32), [[AND5]](i32), [[AND6]](i32), [[AND7]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x i16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr1_vgpr2 + %2:_(<4 x i16>), %3:_(<4 x i1>) = G_SSUBO %0, %1 + %4:_(<4 x i32>) = G_ZEXT %3(<4 x i1>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) + $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4(<4 x i32>) ... --- @@ -388,35 +388,35 @@ body: | ; CHECK-LABEL: name: test_ssubo_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV6]](s32), [[C]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV7]](s32), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SSUBO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[UV2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB]](i32), [[SUB1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[SUB1]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV6]](i32), [[C]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV7]](i32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>), %3:_(<2 x i1>) = G_SSUBO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index 49fb6e9bdaf35..5750df4c09b42 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -14,69 +14,69 @@ body: | ; GFX6-LABEL: name: ssubsat_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX8-LABEL: name: ssubsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SUB2]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ssubsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SSUBSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(i16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SSUBSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SSUBSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -88,69 +88,69 @@ body: | ; GFX6-LABEL: name: ssubsat_s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX8-LABEL: name: ssubsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SUB2]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ssubsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_SSUBSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(i16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SSUBSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ASHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_SSUBSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -162,133 +162,133 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C2]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C3]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C1]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C2]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C3]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C2]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C3]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C1]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SMAX2]], [[C2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SMIN2]], [[C3]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SUB5]], [[C1]](i32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[ASHR]](i32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C5]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY2]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: ssubsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C2]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C3]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C2]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C3]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SUB5]], [[C1]](s16) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C2]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C3]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i16) = G_ASHR [[SUB2]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[SMAX2]], [[C2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[SMIN2]], [[C3]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i16) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i16) = G_ASHR [[SUB5]], [[C1]](i16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[ASHR]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[ASHR1]], [[C5]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ssubsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SSUBSAT]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s8>) = G_BITCAST %2 - %5:_(<2 x s8>) = G_BITCAST %3 - %6:_(<2 x s8>) = G_SSUBSAT %4, %5 - %7:_(s16) = G_BITCAST %6 - %8:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x i16>) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i16>) = G_ASHR [[SSUBSAT]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[ASHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i8>) = G_BITCAST %2(i16) + %5:_(<2 x i8>) = G_BITCAST %3(i16) + %6:_(<2 x i8>) = G_SSUBSAT %4, %5 + %7:_(i16) = G_BITCAST %6(<2 x i8>) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -300,61 +300,61 @@ body: | ; GFX6-LABEL: name: ssubsat_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](i32) ; ; GFX8-LABEL: name: ssubsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C1]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[TRUNC1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SUB2]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ssubsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SSUBSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(i16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SSUBSAT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SSUBSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -366,93 +366,93 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SUB5]], [[C]](i32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: ssubsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC2]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[TRUNC2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB2]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SUB5]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: ssubsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SSUBSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x i16>) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SSUBSAT %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -464,181 +464,181 @@ body: | ; GFX6-LABEL: name: ssubsat_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] - ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] - ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] - ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SUB5]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(i32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SMAX4]], [[C1]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(i32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SMIN4]], [[C2]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(i32) = G_SMAX [[SUB6]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(i32) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SUB8]], [[C]](i32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC3]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] - ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] - ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] - ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i16) = G_SUB [[SMAX4]], [[C1]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i16) = G_SUB [[SMIN4]], [[C2]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(i16) = G_SMAX [[SUB6]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(i16) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(i16) = G_SUB [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB2]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SUB5]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SUB8]](i16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C4]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_SSUBSAT %1, %2 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x i16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x i16>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[SSUBSAT]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[SSUBSAT1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_SSUBSAT %1, %2 + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -650,159 +650,159 @@ body: | ; GFX6-LABEL: name: ssubsat_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] - ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] - ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] - ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] - ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[SMAX6]], [[C1]] - ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] - ; GFX6-NEXT: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[SMIN6]], [[C2]] - ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB9]], [[SHL7]] - ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB10]] - ; GFX6-NEXT: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[SMIN7]] - ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SUB11]], [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[SUB5]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(i32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SMAX4]], [[C1]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(i32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SMIN4]], [[C2]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(i32) = G_SMAX [[SUB6]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(i32) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(i32) = G_SUB [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(i32) = G_ASHR [[SUB8]], [[C]](i32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[C]](i32) + ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(i32) = G_SMAX [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(i32) = G_SUB [[SMAX6]], [[C1]] + ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(i32) = G_SMIN [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB10:%[0-9]+]]:_(i32) = G_SUB [[SMIN6]], [[C2]] + ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(i32) = G_SMAX [[SUB9]], [[SHL7]] + ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(i32) = G_SMIN [[SMAX7]], [[SUB10]] + ; GFX6-NEXT: [[SUB11:%[0-9]+]]:_(i32) = G_SUB [[SHL6]], [[SMIN7]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(i32) = G_ASHR [[SUB11]], [[C]](i32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: ssubsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC4]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] - ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] - ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC6]] - ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] - ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s16) = G_SUB [[SMAX6]], [[C1]] - ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] - ; GFX8-NEXT: [[SUB10:%[0-9]+]]:_(s16) = G_SUB [[SMIN6]], [[C2]] - ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB9]], [[TRUNC7]] - ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB10]] - ; GFX8-NEXT: [[SUB11:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[SMIN7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB11]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i16) = G_SMAX [[SUB]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i16) = G_SMAX [[SUB3]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i16) = G_SUB [[SMAX4]], [[C1]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i16) = G_SUB [[SMIN4]], [[C2]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(i16) = G_SMAX [[SUB6]], [[TRUNC6]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(i16) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(i16) = G_SUB [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(i16) = G_SMAX [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(i16) = G_SUB [[SMAX6]], [[C1]] + ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(i16) = G_SMIN [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB10:%[0-9]+]]:_(i16) = G_SUB [[SMIN6]], [[C2]] + ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(i16) = G_SMAX [[SUB9]], [[TRUNC7]] + ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(i16) = G_SMIN [[SMAX7]], [[SUB10]] + ; GFX8-NEXT: [[SUB11:%[0-9]+]]:_(i16) = G_SUB [[TRUNC3]], [[SMIN7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB2]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SUB5]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SUB8]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SUB11]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: ssubsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x s16>), [[SSUBSAT1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SSUBSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x i16>) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x i16>) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x i16>), [[SSUBSAT1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -814,48 +814,48 @@ body: | ; GFX6-LABEL: name: ssubsat_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB2]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C1]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[COPY1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[SMIN1]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB2]](i32) ; ; GFX8-LABEL: name: ssubsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[COPY1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[SMIN1]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB2]](i32) ; ; GFX9-LABEL: name: ssubsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SSUBSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(i32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SSUBSAT %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -867,72 +867,72 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] - ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] - ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] - ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] - ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C1]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[UV2]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[SMIN1]] + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SMAX2]], [[C]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SMIN2]], [[C1]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[SMIN3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB2]](i32), [[SUB5]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: ssubsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] - ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] - ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] - ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] - ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(i32) = G_SMAX [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(i32) = G_SMIN [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(i32) = G_SMAX [[SUB]], [[UV2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(i32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(i32) = G_SMAX [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SMAX2]], [[C]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(i32) = G_SMIN [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[SMIN2]], [[C1]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(i32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(i32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[SMIN3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB2]](i32), [[SUB5]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: ssubsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SSUBSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(i32) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(i32) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SSUBSAT]](i32), [[SSUBSAT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -944,84 +944,84 @@ body: | ; GFX6-LABEL: name: ssubsat_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY1]](i64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX8-LABEL: name: ssubsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY1]](i64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: ssubsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SSUBSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[COPY]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[COPY1]](i64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -1033,139 +1033,139 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY3]], [[C1]](s32) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[COPY3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[UV]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV2]](i64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV12]], [[UV14]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV2]](i64), [[UV1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV3]](i64), [[C]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[MV2]](i64) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY3]], [[C1]](i32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV16]], [[UV18]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[XOR1]](i1), [[MV3]], [[COPY3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: ssubsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY3]], [[C1]](s32) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[COPY3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV2]](i64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV12]], [[UV14]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV2]](i64), [[UV1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV3]](i64), [[C]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[MV2]](i64) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY3]], [[C1]](i32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[XOR1]](i1), [[MV3]], [[COPY3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: ssubsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[COPY2]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY3]], [[C1]](s32) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[COPY3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_SSUBSAT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV]](i64), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV2]](i64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY2]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[XOR]](i1), [[MV1]], [[COPY2]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV12]], [[UV14]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(slt), [[MV2]](i64), [[UV1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(sgt), [[UV3]](i64), [[C]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY [[MV2]](i64) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY3]], [[C1]](i32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[ASHR1]](i64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C2]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[XOR1]](i1), [[MV3]], [[COPY3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index b9c72d39ed45b..8a5371d168cb1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -17,45 +17,45 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AND]], [[C]] + ; SI-NEXT: G_STORE [[AND1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; CI-LABEL: name: test_store_global_s1_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; CI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AND]], [[C]] + ; CI-NEXT: G_STORE [[AND1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; VI-LABEL: name: test_store_global_s1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; VI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AND]], [[C]] + ; VI-NEXT: G_STORE [[AND1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; GFX9-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AND]], [[C]] + ; GFX9-NEXT: G_STORE [[AND1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s1) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s1), align 1, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i1) = G_TRUNC %1(i32) + G_STORE %2(i1), %0(p1) :: (store (i1), addrspace 1) ... --- @@ -68,41 +68,41 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; CI-LABEL: name: test_store_global_s7_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CI-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; VI-LABEL: name: test_store_global_s7_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s7_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s7) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s7), align 1, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i7) = G_TRUNC %1(i32) + G_STORE %2(i7), %0(p1) :: (store (i7), addrspace 1) ... --- @@ -115,33 +115,33 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; CI-LABEL: name: test_store_global_s8_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; VI-LABEL: name: test_store_global_s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s8) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s8), align 1, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i8) = G_TRUNC %1(i32) + G_STORE %2(i8), %0(p1) :: (store (i8), addrspace 1) ... --- @@ -154,47 +154,47 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) ; ; CI-LABEL: name: test_store_global_s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s16), align 1, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + G_STORE %2(i16), %0(p1) :: (store (i16), align 1, addrspace 1) ... --- @@ -207,33 +207,33 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; CI-LABEL: name: test_store_global_s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; VI-LABEL: name: test_store_global_s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s16), align 2, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + G_STORE %2(i16), %0(p1) :: (store (i16), addrspace 1) ... --- @@ -246,33 +246,33 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s16), align 4, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + G_STORE %2(i16), %0(p1) :: (store (i16), align 4, addrspace 1) ... --- @@ -285,57 +285,57 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; CI-LABEL: name: test_store_global_s24_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) + ; CI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s24) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s24), align 4, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i24) = G_TRUNC %1(i32) + G_STORE %2(i24), %0(p1) :: (store (i24), align 4, addrspace 1) ... --- @@ -348,57 +348,57 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; CI-LABEL: name: test_store_global_s24_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; CI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; GFX9-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s24) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s24), align 2, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i24) = G_TRUNC %1(i32) + G_STORE %2(i24), %0(p1) :: (store (i24), align 2, addrspace 1) ... --- @@ -411,71 +411,71 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) ; ; CI-LABEL: name: test_store_global_s24_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 1, addrspace 1) + ; CI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s24_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s24) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s24), align 1, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i24) = G_TRUNC %1(i32) + G_STORE %2(i24), %0(p1) :: (store (i24), align 1, addrspace 1) ... --- @@ -488,41 +488,41 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33554431 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; CI-LABEL: name: test_store_global_s25_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33554431 + ; CI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CI-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; VI-LABEL: name: test_store_global_s25_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33554431 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s25_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 33554431 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: G_STORE [[AND]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s25) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s25), align 4, addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i25) = G_TRUNC %1(i32) + G_STORE %2(i25), %0(p1) :: (store (i25), addrspace 1) ... # --- @@ -532,9 +532,9 @@ body: | # liveins: $vgpr0_vgpr1, $vgpr2 # %0:_(p1) = COPY $vgpr0_vgpr1 -# %1:_(s32) = COPY $vgpr2 -# %2:_(s25) = G_TRUNC %1 -# G_STORE %2, %0 :: (store (s25), align 2, addrspace 1) +# %1:_(i32) = COPY $vgpr2 +# %2:_(i25) = G_TRUNC %1 +# G_STORE %2, %0 :: (store (i25), align 2, addrspace 1) # ... # --- @@ -544,9 +544,9 @@ body: | # liveins: $vgpr0_vgpr1, $vgpr2 # %0:_(p1) = COPY $vgpr0_vgpr1 -# %1:_(s32) = COPY $vgpr2 -# %2:_(s25) = G_TRUNC %1 -# G_STORE %2, %0 :: (store (s25), align 1, addrspace 1) +# %1:_(i32) = COPY $vgpr2 +# %2:_(i25) = G_TRUNC %1 +# G_STORE %2, %0 :: (store (i25), align 1, addrspace 1) # ... --- @@ -559,67 +559,67 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) ; ; CI-LABEL: name: test_store_global_s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s32), align 1, addrspace 1) + %1:_(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i32), align 1, addrspace 1) ... --- @@ -632,44 +632,44 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) ; ; CI-LABEL: name: test_store_global_s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[COPY1]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s32), align 2, addrspace 1) + %1:_(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i32), align 2, addrspace 1) ... --- @@ -682,32 +682,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; CI-LABEL: name: test_store_global_s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; VI-LABEL: name: test_store_global_s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s32), align 4, addrspace 1) + %1:_(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -721,25 +721,25 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY1]](p3) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[PTRTOINT]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) ; ; CI-LABEL: name: test_store_global_p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 @@ -753,26 +753,26 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY1]](p3) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[PTRTOINT]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -782,7 +782,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store (p3), align 1, addrspace 1) + G_STORE %1(p3), %0(p1) :: (store (p3), align 1, addrspace 1) ... --- @@ -796,14 +796,14 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY1]](p3) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[PTRTOINT]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) ; ; CI-LABEL: name: test_store_global_p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 @@ -817,14 +817,14 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY1]](p3) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[PTRTOINT]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -834,7 +834,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store (p3), align 2, addrspace 1) + G_STORE %1(p3), %0(p1) :: (store (p3), align 2, addrspace 1) ... --- @@ -872,7 +872,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store (p3), align 4, addrspace 1) + G_STORE %1(p3), %0(p1) :: (store (p3), addrspace 1) ... --- @@ -885,106 +885,106 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C4]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY4]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](i64) + ; SI-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) ; ; CI-LABEL: name: test_store_global_s48_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i32), align 1, addrspace 1) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; CI-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s48_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C4]](i16) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR3]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](i64) + ; VI-NEXT: G_STORE [[TRUNC3]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s48_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i32), align 1, addrspace 1) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; GFX9-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s48) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s48), align 1, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i48) = G_TRUNC %1(i64) + G_STORE %2(i48), %0(p1) :: (store (i48), align 1, addrspace 1) ... --- @@ -997,73 +997,73 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_s48_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i32), align 2, addrspace 1) + ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; CI-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_s48_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s48_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i32), align 2, addrspace 1) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; GFX9-NEXT: G_STORE [[TRUNC1]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s48) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s48), align 2, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i48) = G_TRUNC %1(i64) + G_STORE %2(i48), %0(p1) :: (store (i48), align 2, addrspace 1) ... # --- @@ -1073,9 +1073,9 @@ body: | # liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 # %0:_(p1) = COPY $vgpr0_vgpr1 -# %1:_(s64) = COPY $vgpr2_vgpr3 -# %2:_(s48) = G_TRUNC %1 -# G_STORE %2, %0 :: (store (s48), align 4, addrspace 1) +# %1:_(i64) = COPY $vgpr2_vgpr3 +# %2:_(i48) = G_TRUNC %1 +# G_STORE %2, %0 :: (store (i48), align 4, addrspace 1) # ... # --- @@ -1085,9 +1085,9 @@ body: | # liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 # %0:_(p1) = COPY $vgpr0_vgpr1 -# %1:_(s64) = COPY $vgpr2_vgpr3 -# %2:_(s48) = G_TRUNC %1 -# G_STORE %2, %0 :: (store (s48), align 8, addrspace 1) +# %1:_(i64) = COPY $vgpr2_vgpr3 +# %2:_(i48) = G_TRUNC %1 +# G_STORE %2, %0 :: (store (i48), align 8, addrspace 1) # ... @@ -1101,110 +1101,110 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C4]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY4]](i32) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[COPY7]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; CI-LABEL: name: test_store_global_s64_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s64_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C4]](i16) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR3]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR6]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 1, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), align 1, addrspace 1) ... --- @@ -1217,68 +1217,68 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; CI-LABEL: name: test_store_global_s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s64_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 2, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), align 2, addrspace 1) ... --- @@ -1291,32 +1291,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_s64_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_s64_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 4, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), align 4, addrspace 1) ... --- @@ -1329,32 +1329,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) ; ; CI-LABEL: name: test_store_global_s64_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) ; ; VI-LABEL: name: test_store_global_s64_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 8, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), addrspace 1) ... --- @@ -1367,32 +1367,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_s64_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_s64_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s64_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), align 16, addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), align 16, addrspace 1) ... --- @@ -1406,46 +1406,46 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p0) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C4]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY4]](i32) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[COPY7]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; CI-LABEL: name: test_store_global_p0_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1459,48 +1459,48 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p0) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C4]](i16) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR3]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR6]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p0_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1510,7 +1510,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p0), align 1, addrspace 1) + G_STORE %1(p0), %0(p1) :: (store (p0), align 1, addrspace 1) ... --- @@ -1524,26 +1524,26 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p0) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; CI-LABEL: name: test_store_global_p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1557,26 +1557,26 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p0) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1586,7 +1586,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p0), align 2, addrspace 1) + G_STORE %1(p0), %0(p1) :: (store (p0), align 2, addrspace 1) ... --- @@ -1624,7 +1624,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p0), align 4, addrspace 1) + G_STORE %1(p0), %0(p1) :: (store (p0), align 4, addrspace 1) ... --- @@ -1662,7 +1662,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p0), align 8, addrspace 1) + G_STORE %1(p0), %0(p1) :: (store (p0), addrspace 1) ... --- @@ -1700,7 +1700,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p0), align 16, addrspace 1) + G_STORE %1(p0), %0(p1) :: (store (p0), align 16, addrspace 1) ... --- @@ -1714,46 +1714,46 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p999) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C4]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY4]](i32) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[COPY7]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; CI-LABEL: name: test_store_global_p999_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1767,48 +1767,48 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p999) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C4]](i16) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR3]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR6]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p999_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1818,7 +1818,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p999), align 1, addrspace 1) + G_STORE %1(p999), %0(p1) :: (store (p999), align 1, addrspace 1) ... --- @@ -1832,26 +1832,26 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p999) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; CI-LABEL: name: test_store_global_p999_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1865,26 +1865,26 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i64) = G_PTRTOINT [[COPY1]](p999) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[PTRTOINT]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p999_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1894,7 +1894,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p999), align 2, addrspace 1) + G_STORE %1(p999), %0(p1) :: (store (p999), align 2, addrspace 1) ... --- @@ -1932,7 +1932,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p999), align 4, addrspace 1) + G_STORE %1(p999), %0(p1) :: (store (p999), align 4, addrspace 1) ... --- @@ -1970,7 +1970,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p999), align 8, addrspace 1) + G_STORE %1(p999), %0(p1) :: (store (p999), addrspace 1) ... --- @@ -2008,7 +2008,7 @@ body: | ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p999), align 16, addrspace 1) + G_STORE %1(p999), %0(p1) :: (store (p999), align 16, addrspace 1) ... --- @@ -2021,102 +2021,102 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 1, addrspace 1) + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), align 1, addrspace 1) ... --- @@ -2129,60 +2129,60 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 2, addrspace 1) + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), align 2, addrspace 1) ... --- @@ -2195,32 +2195,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 4, addrspace 1) + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- @@ -2233,32 +2233,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1) + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), addrspace 1) ... --- @@ -2271,32 +2271,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), align 16, addrspace 1) + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), align 16, addrspace 1) ... --- @@ -2310,105 +2310,105 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; CI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 1, addrspace 1) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>), align 1, addrspace 1) ... --- @@ -2422,63 +2422,63 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; CI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<2 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 2, addrspace 1) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>), align 2, addrspace 1) ... --- @@ -2492,35 +2492,35 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; SI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; CI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; VI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 4, addrspace 1) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>), align 4, addrspace 1) ... --- @@ -2534,35 +2534,35 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; SI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p3_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; CI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; VI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>), addrspace 1) ... --- @@ -2576,35 +2576,35 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; SI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p3_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; CI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p3_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; VI-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p3_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[COPY1]](<2 x p3>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x p3>), align 16, addrspace 1) + G_STORE %1(<2 x p3>), %0(p1) :: (store (<2 x p3>), align 16, addrspace 1) ... --- @@ -2617,106 +2617,106 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C2]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C1]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[BITCAST]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C2]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY2]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C2]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; SI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY4]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C1]](i16) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: G_STORE [[BITCAST]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C1]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C2]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR3]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C1]](i16) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](i64) + ; VI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C1]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C2]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 1, addrspace 1) + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>), align 1, addrspace 1) ... --- @@ -2729,62 +2729,62 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: G_STORE [[BITCAST]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: G_STORE [[BITCAST]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 6 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 2, addrspace 1) + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>), align 2, addrspace 1) ... --- @@ -2797,32 +2797,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 4, addrspace 1) + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>), align 4, addrspace 1) ... --- @@ -2835,32 +2835,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s16_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1) + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>), addrspace 1) ... --- @@ -2873,32 +2873,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s16_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s16_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s16_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i16>), [[COPY]](p1) :: (store (<4 x i16>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<4 x s16>), align 16, addrspace 1) + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + G_STORE %1(<4 x i16>), %0(p1) :: (store (<4 x i16>), align 16, addrspace 1) ... --- @@ -2911,135 +2911,135 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) ; ; CI-LABEL: name: test_store_global_v3s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v3s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 1, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 1, addrspace 1) ... --- @@ -3052,74 +3052,74 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) ; ; CI-LABEL: name: test_store_global_v3s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v3s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 2, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 2, addrspace 1) ... --- @@ -3132,37 +3132,37 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 4, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 4, addrspace 1) ... --- @@ -3175,37 +3175,37 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v3s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 8, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 8, addrspace 1) ... --- @@ -3218,37 +3218,37 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 16, addrspace 1) ... --- @@ -3261,168 +3261,168 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 1, addrspace 1) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), align 1, addrspace 1) ... --- @@ -3435,88 +3435,88 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 2, addrspace 1) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), align 2, addrspace 1) ... --- @@ -3529,32 +3529,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 4, addrspace 1) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), align 4, addrspace 1) ... --- @@ -3567,32 +3567,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 8, addrspace 1) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), align 8, addrspace 1) ... --- @@ -3605,32 +3605,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v4s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), addrspace 1) ... --- @@ -3643,186 +3643,186 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[COPY8]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY8]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR7]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[COPY15]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C4]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[COPY4]](i32) + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR4]], [[COPY7]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i64) = COPY [[UV1]](i64) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i64) = G_LSHR [[COPY8]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[COPY8]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[TRUNC2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[LSHR8]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR7]](i64) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[COPY13]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C5]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY13]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LSHR11]], [[COPY15]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s64_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR7]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s64) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR12]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C4]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C5]](s64) - ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C4]](i16) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR3]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR6]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY [[UV1]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i64) = G_LSHR [[COPY5]], [[COPY6]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i32) = G_TRUNC [[COPY5]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[TRUNC6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i64) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR9]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR8]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR7]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[TRUNC9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR7]](i64) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC10]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C5]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR12]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR11]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC11]], [[C4]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C5]](i64) + ; VI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 1, addrspace 1) + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), align 1, addrspace 1) ... --- @@ -3835,106 +3835,106 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY [[UV1]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[COPY5]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[COPY5]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[TRUNC2]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR3]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C2]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; SI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[UV]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[TRUNC1]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i64) = COPY [[UV1]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i64) = G_LSHR [[COPY5]], [[COPY6]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[COPY5]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[TRUNC2]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[LSHR3]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[TRUNC3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C2]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 2, addrspace 1) + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), align 2, addrspace 1) ... --- @@ -3947,32 +3947,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s64_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 4, addrspace 1) + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), align 4, addrspace 1) ... --- @@ -3985,32 +3985,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s64_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 8, addrspace 1) + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), align 8, addrspace 1) ... --- @@ -4023,32 +4023,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s64_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1) + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), addrspace 1) ... --- @@ -4061,172 +4061,172 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 1, addrspace 1) + %1:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>), align 1, addrspace 1) ... --- @@ -4239,92 +4239,92 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 2, addrspace 1) + %1:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>), align 2, addrspace 1) ... --- @@ -4337,36 +4337,36 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 4, addrspace 1) + %1:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>), align 4, addrspace 1) ... --- @@ -4379,36 +4379,36 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s16_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 8, addrspace 1) + %1:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>), align 8, addrspace 1) ... --- @@ -4421,36 +4421,36 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s16_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s16_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s16_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<8 x i16>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1) + %1:_(<8 x i16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<8 x i16>), %0(p1) :: (store (<8 x i16>), addrspace 1) ... --- @@ -4464,171 +4464,171 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p0_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p0_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p0_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p0>), align 1, addrspace 1) + G_STORE %1(<2 x p0>), %0(p1) :: (store (<2 x p0>), align 1, addrspace 1) ... --- @@ -4642,91 +4642,91 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p0_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p0>), align 2, addrspace 1) + G_STORE %1(<2 x p0>), %0(p1) :: (store (<2 x p0>), align 2, addrspace 1) ... --- @@ -4740,35 +4740,35 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p0_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p0_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p0_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p0>), align 4, addrspace 1) + G_STORE %1(<2 x p0>), %0(p1) :: (store (<2 x p0>), align 4, addrspace 1) ... --- @@ -4782,35 +4782,35 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p0_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p0_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p0_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p0>), align 8, addrspace 1) + G_STORE %1(<2 x p0>), %0(p1) :: (store (<2 x p0>), align 8, addrspace 1) ... --- @@ -4824,35 +4824,35 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; CI-LABEL: name: test_store_global_v2p0_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2p0_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2p0_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](<2 x p0>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x p0>), align 16, addrspace 1) + G_STORE %1(<2 x p0>), %0(p1) :: (store (<2 x p0>), addrspace 1) ... --- @@ -4865,139 +4865,139 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) ; ; CI-LABEL: name: test_store_global_s96_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; CI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s96_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 1, addrspace 1) + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 1, addrspace 1) ... --- @@ -5010,78 +5010,78 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) ; ; CI-LABEL: name: test_store_global_s96_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; CI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s96_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 2, addrspace 1) + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 2, addrspace 1) ... --- @@ -5094,41 +5094,41 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_s96_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; CI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_s96_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; VI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 4, addrspace 1) + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 4, addrspace 1) ... --- @@ -5141,41 +5141,41 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_s96_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; CI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_s96_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; VI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 8, addrspace 1) + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 8, addrspace 1) ... --- @@ -5188,41 +5188,41 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 16, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_s96_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; CI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_s96_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; VI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY1]](i96) + ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (s96), align 16, addrspace 1) + %1:_(i96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(i96), %0(p1) :: (store (i96), align 16, addrspace 1) ... --- @@ -5235,172 +5235,172 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; CI-LABEL: name: test_store_global_s128_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s128_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 1, addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), align 1, addrspace 1) ... --- @@ -5413,92 +5413,92 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; CI-LABEL: name: test_store_global_s128_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s128_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<4 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s128_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 2, addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), align 2, addrspace 1) ... --- @@ -5511,36 +5511,36 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_s128_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_s128_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 4, addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), align 4, addrspace 1) ... --- @@ -5553,36 +5553,36 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_s128_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_s128_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s128_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 8, addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), align 8, addrspace 1) ... --- @@ -5595,36 +5595,36 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; CI-LABEL: name: test_store_global_s128_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; CI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_s128_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), align 16, addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), addrspace 1) ... --- @@ -5637,211 +5637,211 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY13]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY13]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[COPY15]](i32) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR14]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT8]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR14]](i16) + ; VI-NEXT: G_STORE [[ANYEXT9]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1) + %1:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(<5 x i32>), %0(p1) :: (store (<5 x i32>), align 1, addrspace 1) ... --- @@ -5854,112 +5854,112 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1) + %1:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(<5 x i32>), %0(p1) :: (store (<5 x i32>), align 2, addrspace 1) ... --- @@ -5972,52 +5972,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1) + %1:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(<5 x i32>), %0(p1) :: (store (<5 x i32>), align 4, addrspace 1) ... --- @@ -6030,52 +6030,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1) + %1:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(<5 x i32>), %0(p1) :: (store (<5 x i32>), align 8, addrspace 1) ... --- @@ -6088,52 +6088,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1) + %1:_(<5 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(<5 x i32>), %0(p1) :: (store (<5 x i32>), align 16, addrspace 1) ... --- @@ -6146,214 +6146,214 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY13]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY13]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[COPY15]](i32) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR14]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT8]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR14]](i16) + ; VI-NEXT: G_STORE [[ANYEXT9]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1) + G_STORE %1(<5 x p3>), %0(p1) :: (store (<5 x p3>), align 1, addrspace 1) ... --- @@ -6367,115 +6367,115 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1) + G_STORE %1(<5 x p3>), %0(p1) :: (store (<5 x p3>), align 2, addrspace 1) ... --- @@ -6489,55 +6489,55 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1) + G_STORE %1(<5 x p3>), %0(p1) :: (store (<5 x p3>), align 4, addrspace 1) ... --- @@ -6551,55 +6551,55 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1) + G_STORE %1(<5 x p3>), %0(p1) :: (store (<5 x p3>), align 8, addrspace 1) ... --- @@ -6613,55 +6613,55 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](<5 x p3>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1) + G_STORE %1(<5 x p3>), %0(p1) :: (store (<5 x p3>), align 16, addrspace 1) ... --- @@ -6674,56 +6674,56 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[DEF]](<10 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[DEF]](<10 x i16>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[DEF]](<10 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[DEF]](<10 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<10 x s16>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1) + %1:_(<10 x i16>) = G_IMPLICIT_DEF + G_STORE %1(<10 x i16>), %0(p1) :: (store (<10 x i16>), align 16, addrspace 1) ... --- @@ -6736,96 +6736,96 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<12 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[UV2]](<2 x i16>), [[UV3]](<2 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i16>) + ; SI-NEXT: G_STORE [[BITCAST2]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[BITCAST]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; SI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 20, align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<12 x i16>) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[UV2]](<2 x i16>), [[UV3]](<2 x i16>) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i16>) + ; CI-NEXT: G_STORE [[BITCAST2]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[BITCAST]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](i64) + ; CI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; CI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; CI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 20, align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<12 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[UV2]](<2 x i16>), [[UV3]](<2 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i16>) + ; VI-NEXT: G_STORE [[BITCAST2]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[BITCAST]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 20, align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<12 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[UV2]](<2 x i16>), [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x i16>) + ; GFX9-NEXT: G_STORE [[BITCAST2]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[BITCAST]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](i64) + ; GFX9-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; GFX9-NEXT: G_STORE [[BITCAST1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 20, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<11 x s16>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1) + %1:_(<11 x i16>) = G_IMPLICIT_DEF + G_STORE %1(<11 x i16>), %0(p1) :: (store (<11 x i16>), align 16, addrspace 1) ... --- @@ -6838,56 +6838,56 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>) - ; SI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i32>) = G_BITCAST [[DEF]](<12 x i16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x i32>) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[UV]](<2 x i32>), [[UV1]](<2 x i32>) + ; SI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](<2 x i32>), [[PTR_ADD]](p1) :: (store (<2 x i32>) into unknown-address + 16, align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>) - ; CI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i32>) = G_BITCAST [[DEF]](<12 x i16>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x i32>) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[UV]](<2 x i32>), [[UV1]](<2 x i32>) + ; CI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV2]](<2 x i32>), [[PTR_ADD]](p1) :: (store (<2 x i32>) into unknown-address + 16, align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>) - ; VI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i32>) = G_BITCAST [[DEF]](<12 x i16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x i32>) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[UV]](<2 x i32>), [[UV1]](<2 x i32>) + ; VI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV2]](<2 x i32>), [[PTR_ADD]](p1) :: (store (<2 x i32>) into unknown-address + 16, align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>) - ; GFX9-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x i32>) = G_BITCAST [[DEF]](<12 x i16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x i32>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[UV]](<2 x i32>), [[UV1]](<2 x i32>) + ; GFX9-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV2]](<2 x i32>), [[PTR_ADD]](p1) :: (store (<2 x i32>) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<12 x s16>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1) + %1:_(<12 x i16>) = G_IMPLICIT_DEF + G_STORE %1(<12 x i16>), %0(p1) :: (store (<12 x i16>), align 16, addrspace 1) ... --- @@ -6900,215 +6900,215 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY13]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY13]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[COPY15]](i32) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR14]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) ; ; CI-LABEL: name: test_store_global_s160_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s160_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT8]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR14]](i16) + ; VI-NEXT: G_STORE [[ANYEXT9]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s160_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (s160), align 1, addrspace 1) + %1:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(i160), %0(p1) :: (store (i160), align 1, addrspace 1) ... --- @@ -7121,116 +7121,116 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) ; ; CI-LABEL: name: test_store_global_s160_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s160_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s160_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (s160), align 2, addrspace 1) + %1:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(i160), %0(p1) :: (store (i160), align 2, addrspace 1) ... --- @@ -7243,56 +7243,56 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_s160_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_s160_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (s160), align 4, addrspace 1) + %1:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(i160), %0(p1) :: (store (i160), align 4, addrspace 1) ... --- @@ -7305,56 +7305,56 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_s160_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_s160_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (s160), align 8, addrspace 1) + %1:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(i160), %0(p1) :: (store (i160), align 8, addrspace 1) ... --- @@ -7367,56 +7367,56 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_s160_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_s160_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x i32>) = G_BITCAST [[COPY1]](i160) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<5 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV4]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store (s160), align 16, addrspace 1) + %1:_(i160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + G_STORE %1(i160), %0(p1) :: (store (i160), align 16, addrspace 1) ... --- @@ -7429,304 +7429,304 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY17]](s32) - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY18]](s32) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) - ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY20]](s32) - ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY21]](s32) - ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) - ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) - ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY24]](s32) - ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY13]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY13]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[COPY15]](i32) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR14]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](i64) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[COPY16]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](i64) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY16]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[AND5]], [[COPY17]](i32) + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY16]](i32), [[PTR_ADD19]](p1) :: (store (i8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR16]](i32), [[PTR_ADD21]](p1) :: (store (i8) into unknown-address + 21, addrspace 1) + ; SI-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LSHR15]], [[COPY18]](i32) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR15]](i32), [[PTR_ADD20]](p1) :: (store (i8) into unknown-address + 22, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR17]](i32), [[PTR_ADD22]](p1) :: (store (i8) into unknown-address + 23, addrspace 1) + ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[COPY19]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](i64) + ; SI-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY19]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[AND6]], [[COPY20]](i32) + ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY19]](i32), [[PTR_ADD23]](p1) :: (store (i8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR19]](i32), [[PTR_ADD25]](p1) :: (store (i8) into unknown-address + 25, addrspace 1) + ; SI-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LSHR18]], [[COPY21]](i32) + ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR18]](i32), [[PTR_ADD24]](p1) :: (store (i8) into unknown-address + 26, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR20]](i32), [[PTR_ADD26]](p1) :: (store (i8) into unknown-address + 27, addrspace 1) + ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](i64) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[COPY22]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](i64) + ; SI-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY22]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY23]](i32) + ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY22]](i32), [[PTR_ADD27]](p1) :: (store (i8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR22]](i32), [[PTR_ADD29]](p1) :: (store (i8) into unknown-address + 29, addrspace 1) + ; SI-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LSHR21]], [[COPY24]](i32) + ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR21]](i32), [[PTR_ADD28]](p1) :: (store (i8) into unknown-address + 30, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR23]](i32), [[PTR_ADD30]](p1) :: (store (i8) into unknown-address + 31, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) - ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) - ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) - ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) - ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) - ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC13]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) - ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) - ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) - ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) - ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) - ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC15]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) - ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) - ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT8]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR14]](i16) + ; VI-NEXT: G_STORE [[ANYEXT9]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](i64) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC10]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD19]](p1) :: (store (i8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR16]](i16) + ; VI-NEXT: G_STORE [[ANYEXT10]](i32), [[PTR_ADD21]](p1) :: (store (i8) into unknown-address + 21, addrspace 1) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC11]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR15]](i32), [[PTR_ADD20]](p1) :: (store (i8) into unknown-address + 22, addrspace 1) + ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR17]](i16) + ; VI-NEXT: G_STORE [[ANYEXT11]](i32), [[PTR_ADD22]](p1) :: (store (i8) into unknown-address + 23, addrspace 1) + ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](i64) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY8]](i32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC12]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD23]](p1) :: (store (i8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR19]](i16) + ; VI-NEXT: G_STORE [[ANYEXT12]](i32), [[PTR_ADD25]](p1) :: (store (i8) into unknown-address + 25, addrspace 1) + ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC13]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR18]](i32), [[PTR_ADD24]](p1) :: (store (i8) into unknown-address + 26, addrspace 1) + ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR20]](i16) + ; VI-NEXT: G_STORE [[ANYEXT13]](i32), [[PTR_ADD26]](p1) :: (store (i8) into unknown-address + 27, addrspace 1) + ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](i64) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC14]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD27]](p1) :: (store (i8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR22]](i16) + ; VI-NEXT: G_STORE [[ANYEXT14]](i32), [[PTR_ADD29]](p1) :: (store (i8) into unknown-address + 29, addrspace 1) + ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC15]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR21]](i32), [[PTR_ADD28]](p1) :: (store (i8) into unknown-address + 30, addrspace 1) + ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR23]](i16) + ; VI-NEXT: G_STORE [[ANYEXT15]](i32), [[PTR_ADD30]](p1) :: (store (i8) into unknown-address + 31, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<8 x s32>), align 1, addrspace 1) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<8 x i32>), %0(p1) :: (store (<8 x i32>), align 1, addrspace 1) ... --- @@ -7739,148 +7739,148 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD9]](p1) :: (store (i16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD10]](p1) :: (store (i16) into unknown-address + 22, addrspace 1) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD11]](p1) :: (store (i16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD12]](p1) :: (store (i16) into unknown-address + 26, addrspace 1) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD13]](p1) :: (store (i16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD14]](p1) :: (store (i16) into unknown-address + 30, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD9]](p1) :: (store (i16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD10]](p1) :: (store (i16) into unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD11]](p1) :: (store (i16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD12]](p1) :: (store (i16) into unknown-address + 26, addrspace 1) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD13]](p1) :: (store (i16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD14]](p1) :: (store (i16) into unknown-address + 30, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<8 x s32>), align 2, addrspace 1) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<8 x i32>), %0(p1) :: (store (<8 x i32>), align 2, addrspace 1) ... --- @@ -7893,48 +7893,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<8 x s32>), align 4, addrspace 1) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<8 x i32>), %0(p1) :: (store (<8 x i32>), align 4, addrspace 1) ... --- @@ -7947,48 +7947,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<8 x s32>), align 8, addrspace 1) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<8 x i32>), %0(p1) :: (store (<8 x i32>), align 8, addrspace 1) ... --- @@ -8001,48 +8001,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<8 x s32>), align 16, addrspace 1) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<8 x i32>), %0(p1) :: (store (<8 x i32>), align 16, addrspace 1) ... --- @@ -8055,52 +8055,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](<2 x i128>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v2s128_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](<2 x i128>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](<2 x i128>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](<2 x i128>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<2 x s128>), align 32, addrspace 1) + %1:_(<2 x i128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<2 x i128>), %0(p1) :: (store (<2 x i128>), addrspace 1) ... --- @@ -8113,308 +8113,308 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY17]](s32) - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY18]](s32) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) - ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY20]](s32) - ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY21]](s32) - ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) - ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) - ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY24]](s32) - ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY6]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY9]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY12]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY13]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY13]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[COPY15]](i32) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR14]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](i64) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[COPY16]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](i64) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY16]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[AND5]], [[COPY17]](i32) + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY16]](i32), [[PTR_ADD19]](p1) :: (store (i8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR16]](i32), [[PTR_ADD21]](p1) :: (store (i8) into unknown-address + 21, addrspace 1) + ; SI-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LSHR15]], [[COPY18]](i32) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR15]](i32), [[PTR_ADD20]](p1) :: (store (i8) into unknown-address + 22, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR17]](i32), [[PTR_ADD22]](p1) :: (store (i8) into unknown-address + 23, addrspace 1) + ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[COPY19]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](i64) + ; SI-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY19]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[AND6]], [[COPY20]](i32) + ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY19]](i32), [[PTR_ADD23]](p1) :: (store (i8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR19]](i32), [[PTR_ADD25]](p1) :: (store (i8) into unknown-address + 25, addrspace 1) + ; SI-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LSHR18]], [[COPY21]](i32) + ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR18]](i32), [[PTR_ADD24]](p1) :: (store (i8) into unknown-address + 26, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR20]](i32), [[PTR_ADD26]](p1) :: (store (i8) into unknown-address + 27, addrspace 1) + ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](i64) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[COPY22]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](i64) + ; SI-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY22]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY23]](i32) + ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY22]](i32), [[PTR_ADD27]](p1) :: (store (i8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR22]](i32), [[PTR_ADD29]](p1) :: (store (i8) into unknown-address + 29, addrspace 1) + ; SI-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LSHR21]], [[COPY24]](i32) + ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR21]](i32), [[PTR_ADD28]](p1) :: (store (i8) into unknown-address + 30, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR23]](i32), [[PTR_ADD30]](p1) :: (store (i8) into unknown-address + 31, addrspace 1) ; ; CI-LABEL: name: test_store_global_s256_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_s256_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) - ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) - ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) - ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) - ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) - ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC13]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) - ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) - ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) - ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) - ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) - ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC15]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) - ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) - ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT8]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR14]](i16) + ; VI-NEXT: G_STORE [[ANYEXT9]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](i64) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC10]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD19]](p1) :: (store (i8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR16]](i16) + ; VI-NEXT: G_STORE [[ANYEXT10]](i32), [[PTR_ADD21]](p1) :: (store (i8) into unknown-address + 21, addrspace 1) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC11]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR15]](i32), [[PTR_ADD20]](p1) :: (store (i8) into unknown-address + 22, addrspace 1) + ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR17]](i16) + ; VI-NEXT: G_STORE [[ANYEXT11]](i32), [[PTR_ADD22]](p1) :: (store (i8) into unknown-address + 23, addrspace 1) + ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](i64) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY8]](i32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC12]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD23]](p1) :: (store (i8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR19]](i16) + ; VI-NEXT: G_STORE [[ANYEXT12]](i32), [[PTR_ADD25]](p1) :: (store (i8) into unknown-address + 25, addrspace 1) + ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC13]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR18]](i32), [[PTR_ADD24]](p1) :: (store (i8) into unknown-address + 26, addrspace 1) + ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR20]](i16) + ; VI-NEXT: G_STORE [[ANYEXT13]](i32), [[PTR_ADD26]](p1) :: (store (i8) into unknown-address + 27, addrspace 1) + ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](i64) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC14]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD27]](p1) :: (store (i8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR22]](i16) + ; VI-NEXT: G_STORE [[ANYEXT14]](i32), [[PTR_ADD29]](p1) :: (store (i8) into unknown-address + 29, addrspace 1) + ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC15]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR21]](i32), [[PTR_ADD28]](p1) :: (store (i8) into unknown-address + 30, addrspace 1) + ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR23]](i16) + ; VI-NEXT: G_STORE [[ANYEXT15]](i32), [[PTR_ADD30]](p1) :: (store (i8) into unknown-address + 31, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s256_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (s256), align 1, addrspace 1) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(i256), %0(p1) :: (store (i256), align 1, addrspace 1) ... --- @@ -8427,152 +8427,152 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD9]](p1) :: (store (i16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD10]](p1) :: (store (i16) into unknown-address + 22, addrspace 1) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD11]](p1) :: (store (i16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD12]](p1) :: (store (i16) into unknown-address + 26, addrspace 1) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD13]](p1) :: (store (i16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD14]](p1) :: (store (i16) into unknown-address + 30, addrspace 1) ; ; CI-LABEL: name: test_store_global_s256_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_s256_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY3]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32), [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV12]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV13]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD9]](p1) :: (store (i16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD10]](p1) :: (store (i16) into unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV14]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD11]](p1) :: (store (i16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD12]](p1) :: (store (i16) into unknown-address + 26, addrspace 1) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV15]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD13]](p1) :: (store (i16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD14]](p1) :: (store (i16) into unknown-address + 30, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s256_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (s256), align 2, addrspace 1) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(i256), %0(p1) :: (store (i256), align 2, addrspace 1) ... --- @@ -8585,52 +8585,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_s256_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_s256_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s256_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (s256), align 4, addrspace 1) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(i256), %0(p1) :: (store (i256), align 4, addrspace 1) ... --- @@ -8643,52 +8643,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_s256_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_s256_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s256_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (s256), align 8, addrspace 1) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(i256), %0(p1) :: (store (i256), align 8, addrspace 1) ... --- @@ -8701,52 +8701,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_s256_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_s256_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s256_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (s256), align 16, addrspace 1) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(i256), %0(p1) :: (store (i256), align 16, addrspace 1) ... --- @@ -8759,52 +8759,52 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_s256_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_s256_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x i32>) = G_BITCAST [[COPY1]](i256) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (s256), align 32, addrspace 1) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(i256), %0(p1) :: (store (i256), addrspace 1) ... --- @@ -8817,48 +8817,48 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; SI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v8s32_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; CI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; VI-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x i32>), [[UV1:%[0-9]+]]:_(<4 x i32>) = G_UNMERGE_VALUES [[COPY1]](<8 x i32>) + ; GFX9-NEXT: G_STORE [[UV]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[UV1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store (<8 x s32>), align 32, addrspace 1) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1(<8 x i32>), %0(p1) :: (store (<8 x i32>), addrspace 1) ... --- @@ -8871,364 +8871,364 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY5]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY7]](s32) - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY8]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY10]](s32) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY11]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY13]](s32) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY14]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY16]](s32) - ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY15]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY17]](s32) - ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) - ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY18]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY19]](s32) - ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY18]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY20]](s32) - ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) - ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY21]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY22]](s32) - ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY21]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY23]](s32) - ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) - ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY24]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY25]](s32) - ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY24]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY26]](s32) - ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) - ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) - ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY28]](s32) - ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY27]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) - ; SI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LSHR24]], [[COPY29]](s32) - ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY5]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY6]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY7]](i32) + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR3]], [[COPY8]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY9]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY10]](i32) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[LSHR6]], [[COPY11]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; SI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY12]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY12]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY13]](i32) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY12]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR10]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR9]], [[COPY14]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR11]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; SI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY15]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY15]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY16]](i32) + ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY15]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR13]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(i32) = G_LSHR [[LSHR12]], [[COPY17]](i32) + ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR14]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) + ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](i64) + ; SI-NEXT: [[COPY18:%[0-9]+]]:_(i32) = COPY [[UV5]](i32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[COPY18]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](i64) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[COPY18]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(i32) = G_LSHR [[AND5]], [[COPY19]](i32) + ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY18]](i32), [[PTR_ADD19]](p1) :: (store (i8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR16]](i32), [[PTR_ADD21]](p1) :: (store (i8) into unknown-address + 21, addrspace 1) + ; SI-NEXT: [[COPY20:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(i32) = G_LSHR [[LSHR15]], [[COPY20]](i32) + ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR15]](i32), [[PTR_ADD20]](p1) :: (store (i8) into unknown-address + 22, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR17]](i32), [[PTR_ADD22]](p1) :: (store (i8) into unknown-address + 23, addrspace 1) + ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; SI-NEXT: [[COPY21:%[0-9]+]]:_(i32) = COPY [[UV6]](i32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[COPY21]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](i64) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[COPY21]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(i32) = G_LSHR [[AND6]], [[COPY22]](i32) + ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY21]](i32), [[PTR_ADD23]](p1) :: (store (i8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR19]](i32), [[PTR_ADD25]](p1) :: (store (i8) into unknown-address + 25, addrspace 1) + ; SI-NEXT: [[COPY23:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(i32) = G_LSHR [[LSHR18]], [[COPY23]](i32) + ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR18]](i32), [[PTR_ADD24]](p1) :: (store (i8) into unknown-address + 26, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR20]](i32), [[PTR_ADD26]](p1) :: (store (i8) into unknown-address + 27, addrspace 1) + ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](i64) + ; SI-NEXT: [[COPY24:%[0-9]+]]:_(i32) = COPY [[UV7]](i32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[COPY24]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](i64) + ; SI-NEXT: [[COPY25:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[COPY24]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(i32) = G_LSHR [[AND7]], [[COPY25]](i32) + ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY24]](i32), [[PTR_ADD27]](p1) :: (store (i8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR22]](i32), [[PTR_ADD29]](p1) :: (store (i8) into unknown-address + 29, addrspace 1) + ; SI-NEXT: [[COPY26:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(i32) = G_LSHR [[LSHR21]], [[COPY26]](i32) + ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR21]](i32), [[PTR_ADD28]](p1) :: (store (i8) into unknown-address + 30, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR23]](i32), [[PTR_ADD30]](p1) :: (store (i8) into unknown-address + 31, addrspace 1) + ; SI-NEXT: [[C9:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](i64) + ; SI-NEXT: [[COPY27:%[0-9]+]]:_(i32) = COPY [[UV8]](i32) + ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[COPY27]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](i64) + ; SI-NEXT: [[COPY28:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[COPY27]], [[C3]] + ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(i32) = G_LSHR [[AND8]], [[COPY28]](i32) + ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY27]](i32), [[PTR_ADD31]](p1) :: (store (i8) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR25]](i32), [[PTR_ADD33]](p1) :: (store (i8) into unknown-address + 33, addrspace 1) + ; SI-NEXT: [[COPY29:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(i32) = G_LSHR [[LSHR24]], [[COPY29]](i32) + ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR24]](i32), [[PTR_ADD32]](p1) :: (store (i8) into unknown-address + 34, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR26]](i32), [[PTR_ADD34]](p1) :: (store (i8) into unknown-address + 35, addrspace 1) ; ; CI-LABEL: name: test_store_global_v9s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) - ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) - ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) - ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) - ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) - ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) - ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) - ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) - ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC13]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) - ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) - ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) - ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) - ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) - ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC15]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) - ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) - ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) - ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC16]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) - ; VI-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR25]](s16) - ; VI-NEXT: G_STORE [[ANYEXT16]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) - ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR24]](s32) - ; VI-NEXT: [[LSHR26:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC17]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) - ; VI-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR26]](s16) - ; VI-NEXT: G_STORE [[ANYEXT17]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD3]](p1) :: (store (i8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR4]](i16) + ; VI-NEXT: G_STORE [[ANYEXT2]](i32), [[PTR_ADD5]](p1) :: (store (i8) into unknown-address + 5, addrspace 1) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC3]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD4]](p1) :: (store (i8) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR5]](i16) + ; VI-NEXT: G_STORE [[ANYEXT3]](i32), [[PTR_ADD6]](p1) :: (store (i8) into unknown-address + 7, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC4]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD7]](p1) :: (store (i8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; VI-NEXT: G_STORE [[ANYEXT4]](i32), [[PTR_ADD9]](p1) :: (store (i8) into unknown-address + 9, addrspace 1) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC5]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD8]](p1) :: (store (i8) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; VI-NEXT: G_STORE [[ANYEXT5]](i32), [[PTR_ADD10]](p1) :: (store (i8) into unknown-address + 11, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC6]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD11]](p1) :: (store (i8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR10]](i16) + ; VI-NEXT: G_STORE [[ANYEXT6]](i32), [[PTR_ADD13]](p1) :: (store (i8) into unknown-address + 13, addrspace 1) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR9]](i32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC7]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR9]](i32), [[PTR_ADD12]](p1) :: (store (i8) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR11]](i16) + ; VI-NEXT: G_STORE [[ANYEXT7]](i32), [[PTR_ADD14]](p1) :: (store (i8) into unknown-address + 15, addrspace 1) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY8]](i32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC8]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD15]](p1) :: (store (i8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR13]](i16) + ; VI-NEXT: G_STORE [[ANYEXT8]](i32), [[PTR_ADD17]](p1) :: (store (i8) into unknown-address + 17, addrspace 1) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR12]](i32) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC9]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR12]](i32), [[PTR_ADD16]](p1) :: (store (i8) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR14]](i16) + ; VI-NEXT: G_STORE [[ANYEXT9]](i32), [[PTR_ADD18]](p1) :: (store (i8) into unknown-address + 19, addrspace 1) + ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](i64) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV5]](i32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](i64) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC10]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD19]](p1) :: (store (i8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR16]](i16) + ; VI-NEXT: G_STORE [[ANYEXT10]](i32), [[PTR_ADD21]](p1) :: (store (i8) into unknown-address + 21, addrspace 1) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR15]](i32) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC11]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR15]](i32), [[PTR_ADD20]](p1) :: (store (i8) into unknown-address + 22, addrspace 1) + ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR17]](i16) + ; VI-NEXT: G_STORE [[ANYEXT11]](i32), [[PTR_ADD22]](p1) :: (store (i8) into unknown-address + 23, addrspace 1) + ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](i64) + ; VI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV6]](i32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](i64) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[COPY10]](i32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC12]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD23]](p1) :: (store (i8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR19]](i16) + ; VI-NEXT: G_STORE [[ANYEXT12]](i32), [[PTR_ADD25]](p1) :: (store (i8) into unknown-address + 25, addrspace 1) + ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR18]](i32) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC13]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR18]](i32), [[PTR_ADD24]](p1) :: (store (i8) into unknown-address + 26, addrspace 1) + ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR20]](i16) + ; VI-NEXT: G_STORE [[ANYEXT13]](i32), [[PTR_ADD26]](p1) :: (store (i8) into unknown-address + 27, addrspace 1) + ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](i64) + ; VI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[UV7]](i32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(i32) = G_LSHR [[COPY11]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](i64) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[COPY11]](i32) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC14]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY11]](i32), [[PTR_ADD27]](p1) :: (store (i8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR22]](i16) + ; VI-NEXT: G_STORE [[ANYEXT14]](i32), [[PTR_ADD29]](p1) :: (store (i8) into unknown-address + 29, addrspace 1) + ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR21]](i32) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC15]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR21]](i32), [[PTR_ADD28]](p1) :: (store (i8) into unknown-address + 30, addrspace 1) + ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR23]](i16) + ; VI-NEXT: G_STORE [[ANYEXT15]](i32), [[PTR_ADD30]](p1) :: (store (i8) into unknown-address + 31, addrspace 1) + ; VI-NEXT: [[C8:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](i64) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[UV8]](i32) + ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(i32) = G_LSHR [[COPY12]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](i64) + ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[COPY12]](i32) + ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC16]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY12]](i32), [[PTR_ADD31]](p1) :: (store (i8) into unknown-address + 32, addrspace 1) + ; VI-NEXT: [[ANYEXT16:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR25]](i16) + ; VI-NEXT: G_STORE [[ANYEXT16]](i32), [[PTR_ADD33]](p1) :: (store (i8) into unknown-address + 33, addrspace 1) + ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR24]](i32) + ; VI-NEXT: [[LSHR26:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC17]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR24]](i32), [[PTR_ADD32]](p1) :: (store (i8) into unknown-address + 34, addrspace 1) + ; VI-NEXT: [[ANYEXT17:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR26]](i16) + ; VI-NEXT: G_STORE [[ANYEXT17]](i32), [[PTR_ADD34]](p1) :: (store (i8) into unknown-address + 35, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v9s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 1, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store (<9 x s32>), align 1, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + %3:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + %4:_(<9 x i32>) = G_CONCAT_VECTORS %1(<3 x i32>), %2(<3 x i32>), %3(<3 x i32>) + G_STORE %4(<9 x i32>), %0(p1) :: (store (<9 x i32>), align 1, addrspace 1) ... --- @@ -9241,189 +9241,189 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) - ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) - ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) - ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV5]](i32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD9]](p1) :: (store (i16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD10]](p1) :: (store (i16) into unknown-address + 22, addrspace 1) + ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV6]](i32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD12]](p1) :: (store (i16) into unknown-address + 26, addrspace 1) + ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[UV7]](i32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[COPY11]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY11]](i32), [[PTR_ADD13]](p1) :: (store (i16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD14]](p1) :: (store (i16) into unknown-address + 30, addrspace 1) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[UV8]](i32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[COPY12]], [[C]](i32) + ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY12]](i32), [[PTR_ADD15]](p1) :: (store (i16) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD16]](p1) :: (store (i16) into unknown-address + 34, addrspace 1) ; ; CI-LABEL: name: test_store_global_v9s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) - ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) - ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) - ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[UV1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY5]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY5]](i32), [[PTR_ADD1]](p1) :: (store (i16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD2]](p1) :: (store (i16) into unknown-address + 6, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[UV2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY6]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY6]](i32), [[PTR_ADD3]](p1) :: (store (i16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD4]](p1) :: (store (i16) into unknown-address + 10, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 12 + ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[UV3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY7]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY7]](i32), [[PTR_ADD5]](p1) :: (store (i16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR3]](i32), [[PTR_ADD6]](p1) :: (store (i16) into unknown-address + 14, addrspace 1) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](i64) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[UV4]](i32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY8]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY8]](i32), [[PTR_ADD7]](p1) :: (store (i16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR4]](i32), [[PTR_ADD8]](p1) :: (store (i16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](i64) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[UV5]](i32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY9]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY9]](i32), [[PTR_ADD9]](p1) :: (store (i16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR5]](i32), [[PTR_ADD10]](p1) :: (store (i16) into unknown-address + 22, addrspace 1) + ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](i64) + ; VI-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY [[UV6]](i32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY10]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY10]](i32), [[PTR_ADD11]](p1) :: (store (i16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR6]](i32), [[PTR_ADD12]](p1) :: (store (i16) into unknown-address + 26, addrspace 1) + ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](i64) + ; VI-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY [[UV7]](i32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[COPY11]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY11]](i32), [[PTR_ADD13]](p1) :: (store (i16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR7]](i32), [[PTR_ADD14]](p1) :: (store (i16) into unknown-address + 30, addrspace 1) + ; VI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(i32) = COPY [[UV8]](i32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[COPY12]], [[C]](i32) + ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY12]](i32), [[PTR_ADD15]](p1) :: (store (i16) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR8]](i32), [[PTR_ADD16]](p1) :: (store (i16) into unknown-address + 34, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v9s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 2, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store (<9 x s32>), align 2, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + %3:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + %4:_(<9 x i32>) = G_CONCAT_VECTORS %1(<3 x i32>), %2(<3 x i32>), %3(<3 x i32>) + G_STORE %4(<9 x i32>), %0(p1) :: (store (<9 x i32>), align 2, addrspace 1) ... --- @@ -9436,87 +9436,87 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, addrspace 1) ; ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, addrspace 1) ; ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 4, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store (<9 x s32>), align 4, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + %3:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + %4:_(<9 x i32>) = G_CONCAT_VECTORS %1(<3 x i32>), %2(<3 x i32>), %3(<3 x i32>) + G_STORE %4(<9 x i32>), %0(p1) :: (store (<9 x i32>), align 4, addrspace 1) ... --- @@ -9529,87 +9529,87 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 8, addrspace 1) ; ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 8, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 8, addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store (<9 x s32>), align 8, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + %3:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + %4:_(<9 x i32>) = G_CONCAT_VECTORS %1(<3 x i32>), %2(<3 x i32>), %3(<3 x i32>) + G_STORE %4(<9 x i32>), %0(p1) :: (store (<9 x i32>), align 8, addrspace 1) ... --- @@ -9622,85 +9622,85 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; CI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; CI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY3]](<3 x i32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32), [[UV3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[UV4]](i32), [[UV5]](i32), [[UV6]](i32), [[UV7]](i32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x i32>), [[PTR_ADD]](p1) :: (store (<4 x i32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; GFX9-NEXT: G_STORE [[UV8]](i32), [[PTR_ADD1]](p1) :: (store (i32) into unknown-address + 32, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 - %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store (<9 x s32>), align 16, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i32>) = COPY $vgpr5_vgpr6_vgpr7 + %3:_(<3 x i32>) = COPY $vgpr7_vgpr8_vgpr9 + %4:_(<9 x i32>) = G_CONCAT_VECTORS %1(<3 x i32>), %2(<3 x i32>), %3(<3 x i32>) + G_STORE %4(<9 x i32>), %0(p1) :: (store (<9 x i32>), align 16, addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 22d792abe3624..b3b13a2489214 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -5,9 +5,9 @@ # RUN: FileCheck -check-prefix=ERR %s < %t.err # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(<2 x s8>), %0:_(p1) :: (store (<2 x s4>), addrspace 1) (in function: test_truncstore_global_v2s8_to_1_align1) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: G_STORE %2:_(<3 x s8>), %0:_(p1) :: (store (<3 x s2>), addrspace 1) (in function: test_truncstore_global_v3s8_to_1_align1) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: G_STORE %2:_(<3 x s8>), %0:_(p1) :: (store (<3 x s4>), addrspace 1) (in function: test_truncstore_global_v3s8_to_2_align2) +# ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(<2 x i8>), %0:_(p1) :: (store (<2 x i4>), addrspace 1) (in function: test_truncstore_global_v2s8_to_1_align1) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: G_STORE %2:_(<3 x i8>), %0:_(p1) :: (store (<3 x i2>), addrspace 1) (in function: test_truncstore_global_v3s8_to_1_align1) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: G_STORE %2:_(<3 x i8>), %0:_(p1) :: (store (<3 x i4>), addrspace 1) (in function: test_truncstore_global_v3s8_to_2_align2) # ERR-NOT: remark --- @@ -20,18 +20,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; VI-LABEL: name: test_store_global_i32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store (s32), addrspace 1) + %1:_(i32) = COPY $vgpr2 + G_STORE %1(i32), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -44,18 +44,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) ; ; VI-LABEL: name: test_store_global_i64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s64), addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i64), addrspace 1) ... --- @@ -79,7 +79,7 @@ body: | ; VI-NEXT: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p1), addrspace 1) + G_STORE %1(p1), %0(p1) :: (store (p1), addrspace 1) ... --- @@ -103,7 +103,7 @@ body: | ; VI-NEXT: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (p4), addrspace 1) + G_STORE %1(p4), %0(p1) :: (store (p4), addrspace 1) ... --- @@ -127,7 +127,7 @@ body: | ; VI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store (p3), addrspace 1) + G_STORE %1(p3), %0(p1) :: (store (p3), addrspace 1) ... --- @@ -140,18 +140,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (<2 x s32>), addrspace 1) + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + G_STORE %1(<2 x i32>), %0(p1) :: (store (<2 x i32>), addrspace 1) ... --- @@ -164,18 +164,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i16>), [[COPY]](p1) :: (store (<2 x i16>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i16>), [[COPY]](p1) :: (store (<2 x i16>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = COPY $vgpr2 - G_STORE %1, %0 :: (store (<2 x s16>), addrspace 1) + %1:_(<2 x i16>) = COPY $vgpr2 + G_STORE %1(<2 x i16>), %0(p1) :: (store (<2 x i16>), addrspace 1) ... --- @@ -188,23 +188,23 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY]](p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store (<3 x s32>), align 4, addrspace 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 4, addrspace 1) ... --- @@ -217,20 +217,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s64_to_s8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s8), addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i8), addrspace 1) ... --- @@ -243,20 +243,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s64_to_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s16), addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i16), addrspace 1) ... --- @@ -269,34 +269,34 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; SI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s64_to_s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C]](i16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s16), addrspace 1, align 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i16), align 1, addrspace 1) ... --- @@ -309,20 +309,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s32), addrspace 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i32), addrspace 1) ... --- @@ -335,32 +335,32 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s32), addrspace 1, align 2) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i32), align 2, addrspace 1) ... --- @@ -373,55 +373,55 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C2]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[TRUNC]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C]](i32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](i64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i64) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC1]], [[C2]](i16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C2]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store (s32), addrspace 1, align 1) + %1:_(i64) = COPY $vgpr2_vgpr3 + G_STORE %1(i64), %0(p1) :: (store (i32), align 1, addrspace 1) ... --- @@ -434,20 +434,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i128) + ; SI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s128_to_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i128) + ; VI-NEXT: G_STORE [[TRUNC]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s16), addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i16), addrspace 1) ... --- @@ -460,20 +460,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_s128_to_s8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), addrspace 1) ... --- @@ -486,25 +486,25 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AND]], [[C]] + ; SI-NEXT: G_STORE [[AND1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; VI-LABEL: name: test_store_global_i1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; VI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[AND]], [[C]] + ; VI-NEXT: G_STORE [[AND1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s1) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s1), addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i1) = G_TRUNC %1(i32) + G_STORE %2(i1), %0(p1) :: (store (i1), addrspace 1) ... --- @@ -517,19 +517,19 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) ; ; VI-LABEL: name: test_store_global_i8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s8) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s8), addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i8) = G_TRUNC %1(i32) + G_STORE %2(i8), %0(p1) :: (store (i8), addrspace 1) ... --- @@ -542,19 +542,19 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; SI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; VI-LABEL: name: test_store_global_i16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; VI-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store (s16), addrspace 1) + %1:_(i32) = COPY $vgpr2 + %2:_(i16) = G_TRUNC %1(i32) + G_STORE %2(i16), %0(p1) :: (store (i16), addrspace 1) ... --- @@ -566,27 +566,27 @@ body: | ; SI-LABEL: name: test_store_global_96 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY]](i96) + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[BITCAST]](<3 x i32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x i32>), [[COPY1]](p1) :: (store (<2 x i32>), align 16, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; SI-NEXT: G_STORE [[UV2]](i32), [[PTR_ADD]](p1) :: (store (i32) into unknown-address + 8, align 8, addrspace 1) ; ; VI-LABEL: name: test_store_global_96 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) - ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY1]](p1) :: (store (<3 x s32>), align 16, addrspace 1) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x i32>) = G_BITCAST [[COPY]](i96) + ; VI-NEXT: G_STORE [[BITCAST]](<3 x i32>), [[COPY1]](p1) :: (store (<3 x i32>), align 16, addrspace 1) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(p1) = COPY $vgpr3_vgpr4 + G_STORE %0(i96), %1(p1) :: (store (i96), align 16, addrspace 1) - G_STORE %0, %1 :: (store (s96), addrspace 1, align 16) ... --- @@ -599,20 +599,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; SI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) ; ; VI-LABEL: name: test_store_global_i128 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x i32>) = G_BITCAST [[COPY1]](i128) + ; VI-NEXT: G_STORE [[BITCAST]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (s128), addrspace 1) + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(i128), %0(p1) :: (store (i128), addrspace 1) ... --- @@ -625,18 +625,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: G_STORE [[COPY1]](<2 x i64>), [[COPY]](p1) :: (store (<2 x i64>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store (<2 x s64>), addrspace 1) + %1:_(<2 x i64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1(<2 x i64>), %0(p1) :: (store (<2 x i64>), addrspace 1) ... @@ -650,51 +650,51 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[C1]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[ANYEXT]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C1]](s16) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[OR]], [[C1]](i16) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](i64) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 1) + %1:_(<2 x i8>) = G_IMPLICIT_DEF + G_STORE %1(<2 x i8>), %0(p1) :: (store (<2 x i8>), align 1, addrspace 1) ... @@ -708,39 +708,39 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: G_STORE [[ANYEXT]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 2) + %1:_(<2 x i8>) = G_IMPLICIT_DEF + G_STORE %1(<2 x i8>), %0(p1) :: (store (<2 x i8>), addrspace 1) ... @@ -754,39 +754,39 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; SI-NEXT: G_STORE [[ANYEXT]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 4) + %1:_(<2 x i8>) = G_IMPLICIT_DEF + G_STORE %1(<2 x i8>), %0(p1) :: (store (<2 x i8>), align 4, addrspace 1) ... @@ -800,85 +800,85 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C3]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[COPY3]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY4]], [[C3]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C5]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[C1]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY4]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C1]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[OR]], [[C1]](i16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<3 x s8>), addrspace 1, align 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i8>) = G_TRUNC %1(<3 x i32>) + G_STORE %2(<3 x i8>), %0(p1) :: (store (<3 x i8>), align 1, addrspace 1) ... @@ -892,73 +892,73 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C3]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<3 x s8>), addrspace 1, align 2) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i8>) = G_TRUNC %1(<3 x i32>) + G_STORE %2(<3 x i8>), %0(p1) :: (store (<3 x i8>), align 2, addrspace 1) ... @@ -972,73 +972,73 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; SI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[COPY2]](i32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[TRUNC1]] + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[DEF]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[SHL1]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[TRUNC3]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY3]], [[C3]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY3]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C1]](i16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND3]], [[C1]](i16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), align 4, addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<3 x s8>), addrspace 1, align 4) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i8>) = G_TRUNC %1(<3 x i32>) + G_STORE %2(<3 x i8>), %0(p1) :: (store (<3 x i8>), align 4, addrspace 1) ... @@ -1052,87 +1052,87 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[C1]](i32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR1]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY3]](i32) + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](i64) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; SI-NEXT: G_STORE [[LSHR2]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] - ; VI-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C6]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR3]], [[C6]](s16) - ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C6]](s16) - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C7]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C5]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; VI-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C5]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C6]](i16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL3]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[OR3]], [[C6]](i16) + ; VI-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i8), addrspace 1) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; VI-NEXT: G_STORE [[ANYEXT]](i32), [[PTR_ADD1]](p1) :: (store (i8) into unknown-address + 1, addrspace 1) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[TRUNC2]], [[C6]](i16) + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C7]](i64) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i8) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR2]](i16) + ; VI-NEXT: G_STORE [[ANYEXT1]](i32), [[PTR_ADD2]](p1) :: (store (i8) into unknown-address + 3, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<4 x s8>), addrspace 1, align 1) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<4 x i8>) = G_TRUNC %1(<4 x i32>) + G_STORE %2(<4 x i8>), %0(p1) :: (store (<4 x i8>), align 1, addrspace 1) ... @@ -1146,59 +1146,59 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; SI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; SI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[OR2]](i32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY2]], [[C2]](i32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](i64) + ; VI-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i16), addrspace 1) + ; VI-NEXT: G_STORE [[LSHR]](i32), [[PTR_ADD]](p1) :: (store (i16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<4 x s8>), addrspace 1, align 2) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<4 x i8>) = G_TRUNC %1(<4 x i32>) + G_STORE %2(<4 x i8>), %0(p1) :: (store (<4 x i8>), align 2, addrspace 1) ... @@ -1212,49 +1212,49 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; SI-NEXT: G_STORE [[OR2]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; ; VI-LABEL: name: test_store_global_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C2]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C3]](i32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; VI-NEXT: G_STORE [[OR2]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<4 x s8>), addrspace 1, align 4) + %1:_(<4 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<4 x i8>) = G_TRUNC %1(<4 x i32>) + G_STORE %2(<4 x i8>), %0(p1) :: (store (<4 x i8>), addrspace 1) ... @@ -1268,18 +1268,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF - ; SI-NEXT: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i8>) = G_IMPLICIT_DEF + ; SI-NEXT: G_STORE [[DEF]](<2 x i8>), [[COPY]](p1) :: (store (<2 x i4>), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_v2s8_to_1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF - ; VI-NEXT: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x i8>) = G_IMPLICIT_DEF + ; VI-NEXT: G_STORE [[DEF]](<2 x i8>), [[COPY]](p1) :: (store (<2 x i4>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<2 x s4>), addrspace 1, align 1) + %1:_(<2 x i8>) = G_IMPLICIT_DEF + G_STORE %1(<2 x i8>), %0(p1) :: (store (<2 x i4>), addrspace 1) ... @@ -1293,21 +1293,21 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x i8>) = G_TRUNC [[COPY1]](<3 x i32>) + ; SI-NEXT: G_STORE [[TRUNC]](<3 x i8>), [[COPY]](p1) :: (store (<3 x i2>), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; VI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x i8>) = G_TRUNC [[COPY1]](<3 x i32>) + ; VI-NEXT: G_STORE [[TRUNC]](<3 x i8>), [[COPY]](p1) :: (store (<3 x i2>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<3 x s2>), addrspace 1, align 1) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i8>) = G_TRUNC %1(<3 x i32>) + G_STORE %2(<3 x i8>), %0(p1) :: (store (<3 x i2>), addrspace 1) ... @@ -1321,20 +1321,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x i8>) = G_TRUNC [[COPY1]](<3 x i32>) + ; SI-NEXT: G_STORE [[TRUNC]](<3 x i8>), [[COPY]](p1) :: (store (<3 x i4>), addrspace 1) ; ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; VI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x i8>) = G_TRUNC [[COPY1]](<3 x i32>) + ; VI-NEXT: G_STORE [[TRUNC]](<3 x i8>), [[COPY]](p1) :: (store (<3 x i4>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store (<3 x s4>), addrspace 1, align 2) + %1:_(<3 x i32>) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(<3 x i8>) = G_TRUNC %1(<3 x i32>) + G_STORE %2(<3 x i8>), %0(p1) :: (store (<3 x i4>), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-strict_fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-strict_fsub.mir index aeec40ab4aac4..b81785ded7bed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-strict_fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-strict_fsub.mir @@ -10,15 +10,21 @@ body: | ; GCN-LABEL: name: test_strict_fsub_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; GCN-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s64) = G_STRICT_FADD [[COPY]], [[FNEG]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[STRICT_FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_STRICT_FSUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(f64) = G_BITCAST [[COPY]](i64) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(f64) = G_BITCAST [[COPY1]](i64) + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(f64) = G_FNEG [[BITCAST1]] + ; GCN-NEXT: [[STRICT_FADD:%[0-9]+]]:_(f64) = G_STRICT_FADD [[BITCAST]], [[FNEG]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(i64) = G_BITCAST [[STRICT_FADD]](f64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST2]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_STRICT_FSUB %2, %3 + %5:_(i64) = G_BITCAST %4(f64) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -30,13 +36,19 @@ body: | ; GCN-LABEL: name: test_strict_fsub_v2s16 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]] - ; GCN-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s16>) = G_STRICT_FADD [[COPY]], [[FNEG]] - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_FADD]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_STRICT_FSUB %0, %1 - $vgpr0 = COPY %2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GCN-NEXT: [[FNEG:%[0-9]+]]:_(<2 x f16>) = G_FNEG [[BITCAST1]] + ; GCN-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x f16>) = G_STRICT_FADD [[BITCAST]], [[FNEG]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[STRICT_FADD]](<2 x f16>) + ; GCN-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %3:_(<2 x f16>) = G_BITCAST %1(<2 x i16>) + %4:_(<2 x f16>) = G_STRICT_FSUB %2, %3 + %5:_(<2 x i16>) = G_BITCAST %4(<2 x f16>) + $vgpr0 = COPY %5(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir index a2889fd559501..3484f6eba8763 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -14,28 +14,30 @@ body: | ; GFX6-LABEL: name: test_sub_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](i32) + ; ; GFX8-LABEL: name: test_sub_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB]](i32) + ; ; GFX9-LABEL: name: test_sub_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SUB %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SUB %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -47,40 +49,42 @@ body: | ; GFX6-LABEL: name: test_sub_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[UV2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[UV3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB]](i32), [[SUB1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX8-LABEL: name: test_sub_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[UV2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB]](i32), [[SUB1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; ; GFX9-LABEL: name: test_sub_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_SUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[UV2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB]](i32), [[SUB1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_SUB %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -92,39 +96,41 @@ body: | ; GFX6-LABEL: name: test_sub_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND]](i32) + ; ; GFX8-LABEL: name: test_sub_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + ; ; GFX9-LABEL: name: test_sub_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SUB %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SUB %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -136,55 +142,57 @@ body: | ; GFX6-LABEL: name: test_sub_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[BITCAST]], [[BITCAST1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX8-LABEL: name: test_sub_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SUB1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; ; GFX9-LABEL: name: test_sub_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SUB %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(<2 x i16>) = G_SUB [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SUB %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -195,85 +203,87 @@ body: | ; GFX6-LABEL: name: test_sub_v3s16 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY3]] - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY4]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB1]](s32) - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY5]] - ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SUB2]](s32) - ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY3]] + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SUB]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY1]], [[COPY4]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SUB1]](i32) + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[COPY2]], [[COPY5]] + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[SUB2]](i32) + ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16) + ; ; GFX8-LABEL: name: test_sub_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC3]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC4]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SUB]](s16), implicit [[SUB1]](s16), implicit [[SUB2]](s16) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SUB]](i16), implicit [[SUB1]](i16), implicit [[SUB2]](i16) + ; ; GFX9-LABEL: name: test_sub_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SUB]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SUB1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC6]](s16), implicit [[TRUNC7]](s16), implicit [[TRUNC8]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s16) = G_TRUNC %0 - %7:_(s16) = G_TRUNC %1 - %8:_(s16) = G_TRUNC %2 - %9:_(s16) = G_TRUNC %3 - %10:_(s16) = G_TRUNC %4 - %11:_(s16) = G_TRUNC %5 - %12:_(<3 x s16>) = G_BUILD_VECTOR %6, %7, %8 - %13:_(<3 x s16>) = G_BUILD_VECTOR %9, %10, %11 - %14:_(<3 x s16>) = G_SUB %12, %13 - %15:_(s16), %16:_(s16), %17:_(s16) = G_UNMERGE_VALUES %14 - S_ENDPGM 0, implicit %15, implicit %16, implicit %17 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[TRUNC4]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC5]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(<2 x i16>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(<2 x i16>) = G_SUB [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SUB]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[SUB1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC6]](i16), implicit [[TRUNC7]](i16), implicit [[TRUNC8]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i16) = G_TRUNC %0(i32) + %7:_(i16) = G_TRUNC %1(i32) + %8:_(i16) = G_TRUNC %2(i32) + %9:_(i16) = G_TRUNC %3(i32) + %10:_(i16) = G_TRUNC %4(i32) + %11:_(i16) = G_TRUNC %5(i32) + %12:_(<3 x i16>) = G_BUILD_VECTOR %6(i16), %7(i16), %8(i16) + %13:_(<3 x i16>) = G_BUILD_VECTOR %9(i16), %10(i16), %11(i16) + %14:_(<3 x i16>) = G_SUB %12, %13 + %15:_(i16), %16:_(i16), %17:_(i16) = G_UNMERGE_VALUES %14(<3 x i16>) + S_ENDPGM 0, implicit %15(i16), implicit %16(i16), implicit %17(i16) ... --- @@ -285,91 +295,93 @@ body: | ; GFX6-LABEL: name: test_sub_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR2]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[LSHR1]], [[LSHR3]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB3]], [[C1]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; ; GFX8-LABEL: name: test_sub_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC4]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC5]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC6]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[TRUNC7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i16) = G_SUB [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i16) = G_SUB [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i16) = G_SUB [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SUB]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SUB1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SUB2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SUB3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; ; GFX9-LABEL: name: test_sub_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[UV]], [[UV2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(<2 x s16>) = G_SUB [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SUB]](<2 x s16>), [[SUB1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_SUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(<2 x i16>) = G_SUB [[UV]], [[UV2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(<2 x i16>) = G_SUB [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[SUB]](<2 x i16>), [[SUB1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_SUB %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -381,40 +393,42 @@ body: | ; GFX6-LABEL: name: test_sub_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; GFX8-LABEL: name: test_sub_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; GFX9-LABEL: name: test_sub_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_SUB %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_SUB %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -426,37 +440,39 @@ body: | ; GFX6-LABEL: name: test_sub_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](i32) + ; ; GFX8-LABEL: name: test_sub_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SUB]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX9-LABEL: name: test_sub_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_SUB %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SUB]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_SUB %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -468,31 +484,33 @@ body: | ; GFX6-LABEL: name: test_sub_s24 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](i32) + ; ; GFX8-LABEL: name: test_sub_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB]](i32) + ; ; GFX9-LABEL: name: test_sub_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[SUB]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24) = G_SUB %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SUB]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24) = G_SUB %2, %3 + %5:_(i32) = G_ANYEXT %4(i24) + $vgpr0 = COPY %5(i32) ... --- @@ -503,43 +521,45 @@ body: | ; GFX6-LABEL: name: test_sub_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; GFX8-LABEL: name: test_sub_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; GFX9-LABEL: name: test_sub_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_SUB %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_SUB %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -551,41 +571,43 @@ body: | ; GFX6-LABEL: name: test_sub_s96 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV3]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + ; ; GFX8-LABEL: name: test_sub_s96 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV3]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + ; ; GFX9-LABEL: name: test_sub_s96 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s96) = G_SUB %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i96) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV3]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i96) = G_SUB %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap-gfx11.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap-gfx11.mir index e3d31c702482f..515049d9aaff2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap-gfx11.mir @@ -10,15 +10,15 @@ body: | ; GFX1100-LABEL: name: test_trap ; GFX1100: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX1100-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX1100-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1100-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GFX1100-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec ; GFX1100-NEXT: {{ $}} ; GFX1100-NEXT: .1: ; GFX1100-NEXT: successors: ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1100-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GFX1100-NEXT: {{ $}} ; GFX1100-NEXT: .2: ; GFX1100-NEXT: successors: %bb.3(0x80000000) @@ -40,16 +40,16 @@ body: | ; GFX1100-NEXT: S_BRANCH %bb.3 ; ; GFX1150-LABEL: name: test_trap - ; GFX1150: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX1150: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX1150-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1150-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GFX1150-NEXT: S_TRAP 2 - ; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) - %0:_(s8) = G_CONSTANT i8 0 + ; GFX1150-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) + %0:_(i8) = G_CONSTANT i8 0 %1:_(p1) = G_CONSTANT i64 0 - G_STORE %0, %1 :: (store 1, addrspace 1) + G_STORE %0(i8), %1(p1) :: (store (i8), addrspace 1) G_TRAP - G_STORE %0, %1 :: (store 1, addrspace 1) + G_STORE %0(i8), %1(p1) :: (store (i8), addrspace 1) ... --- @@ -59,15 +59,15 @@ body: | ; GFX1100: bb.0: ; GFX1100-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX1100-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX1100-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1100-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GFX1100-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec ; GFX1100-NEXT: {{ $}} ; GFX1100-NEXT: bb.1: ; GFX1100-NEXT: successors: ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1100-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GFX1100-NEXT: {{ $}} ; GFX1100-NEXT: bb.2: ; GFX1100-NEXT: successors: %bb.3(0x80000000) @@ -92,21 +92,23 @@ body: | ; GFX1150: bb.0: ; GFX1150-NEXT: successors: %bb.1(0x80000000) ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX1150-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GFX1150-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1150-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GFX1150-NEXT: S_TRAP 2 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: bb.1: - ; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GFX1150-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) - %0:_(s8) = G_CONSTANT i8 0 + %0:_(i8) = G_CONSTANT i8 0 %1:_(p1) = G_CONSTANT i64 0 - G_STORE %0, %1 :: (store 1, addrspace 1) + G_STORE %0(i8), %1(p1) :: (store (i8), addrspace 1) G_TRAP bb.1: - G_STORE %0, %1 :: (store 1, addrspace 1) + G_STORE %0(i8), %1(p1) :: (store (i8), addrspace 1) + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir index 305eca792cfbc..274752bf1d804 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir @@ -10,24 +10,27 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: ; GCN-NEXT: {{ $}} - ; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GCN-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: S_ENDPGM 0 bb.0: - %0:_(s8) = G_CONSTANT i8 0 + successors: %bb.1(0x80000000) + + %0:_(i8) = G_CONSTANT i8 0 %1:_(p1) = G_CONSTANT i64 0 G_TRAP bb.1: - G_STORE %0, %1 :: (store 1, addrspace 1) + G_STORE %0(i8), %1(p1) :: (store (i8), addrspace 1) + ... @@ -38,7 +41,7 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: @@ -49,16 +52,19 @@ body: | ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: ; GCN-NEXT: {{ $}} - ; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GCN-NEXT: G_STORE [[C]](i32), [[C1]](p1) :: (store (i8), addrspace 1) ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: S_ENDPGM 0 bb.0: - %0:_(s8) = G_CONSTANT i8 0 + successors: %bb.1(0x80000000) + + %0:_(i8) = G_CONSTANT i8 0 G_TRAP %1:_(p1) = G_CONSTANT i64 0 bb.1: - G_STORE %0, %1 :: (store 1, addrspace 1) + G_STORE %0(i8), %1(p1) :: (store (i8), addrspace 1) + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index 282550830442c..7513d8e51f993 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_trunc_s64_to_s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_TRUNC %0(i64) + $vgpr0 = COPY %1(i32) ... --- @@ -27,12 +27,12 @@ body: | ; CHECK-LABEL: name: test_trunc_s64_to_s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i16) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i16) ... --- @@ -44,12 +44,12 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s32_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<2 x s16>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[COPY]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<2 x i16>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -61,16 +61,16 @@ body: | ; CHECK-LABEL: name: test_trunc_v3s32_to_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>) = G_TRUNC %0 - %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 - S_ENDPGM 0, implicit %2, implicit %3, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i16), implicit [[TRUNC1]](i16), implicit [[TRUNC2]](i16) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>) = G_TRUNC %0(<3 x i32>) + %2:_(i16), %3:_(i16), %4:_(i16) = G_UNMERGE_VALUES %1(<3 x i16>) + S_ENDPGM 0, implicit %2(i16), implicit %3(i16), implicit %4(i16) ... --- @@ -82,15 +82,15 @@ body: | ; CHECK-LABEL: name: test_trunc_v4s32_to_v4s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s16>) = G_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV1]](<2 x i32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x i16>), [[TRUNC1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i16>) = G_TRUNC %0(<4 x i32>) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -102,17 +102,17 @@ body: | ; CHECK-LABEL: name: test_trunc_v8s32_to_v8s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s16>) = G_TRUNC %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>), [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<8 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV1]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV2]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[UV3]](<2 x i32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x i16>), [[TRUNC1]](<2 x i16>), [[TRUNC2]](<2 x i16>), [[TRUNC3]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x i16>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i16>) = G_TRUNC %0(<8 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -124,21 +124,21 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s64_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s16>) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i64>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -150,29 +150,29 @@ body: | ; CHECK-LABEL: name: test_trunc_v4s64_to_v4s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s16>) = G_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C1]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x i16>), [[BITCAST1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i16>) = G_TRUNC %0(<4 x i64>) + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -184,12 +184,12 @@ body: | ; CHECK-LABEL: name: test_trunc_s64_to_s1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s1) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i1) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i1) = G_TRUNC %0(i64) + S_ENDPGM 0, implicit %1(i1) ... --- @@ -201,12 +201,12 @@ body: | ; CHECK-LABEL: name: test_trunc_s32_to_s1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + S_ENDPGM 0, implicit %1(i1) ... --- @@ -218,13 +218,13 @@ body: | ; CHECK-LABEL: name: test_trunc_s16_to_s1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s1) = G_TRUNC %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i1) = G_TRUNC %1(i16) + S_ENDPGM 0, implicit %2(i1) ... --- @@ -236,28 +236,28 @@ body: | ; CHECK-LABEL: name: test_trunc_v4s32_to_v4s1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s1) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV4]], [[UV8]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[UV5]], [[UV9]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC2]](s1), [[UV6]], [[UV10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC3]](s1), [[UV7]], [[UV11]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<4 x s1>) = G_TRUNC %0 - %4:_(<4 x s32>) = G_SELECT %3, %1, %2 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i1) = G_TRUNC [[UV2]](i32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i1) = G_TRUNC [[UV3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[TRUNC]](i1), [[UV4]], [[UV8]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[TRUNC1]](i1), [[UV5]], [[UV9]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[TRUNC2]](i1), [[UV6]], [[UV10]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[TRUNC3]](i1), [[UV7]], [[UV11]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x i1>) = G_TRUNC %0(<4 x i32>) + %4:_(<4 x i32>) = G_SELECT %3(<4 x i1>), %1, %2 + S_ENDPGM 0, implicit %4(<4 x i32>) ... --- @@ -269,15 +269,15 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s64_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = G_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i32>) = G_TRUNC %0(<2 x i64>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -289,17 +289,17 @@ body: | ; CHECK-LABEL: name: test_trunc_v4s64_to_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32), [[TRUNC2]](s32), [[TRUNC3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<4 x s32>) = G_TRUNC %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64), [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32), [[TRUNC2]](i32), [[TRUNC3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x i32>) = G_TRUNC %0(<4 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -311,15 +311,15 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[UV]](s128) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[TRUNC]](s64), [[TRUNC1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s64>) = G_TRUNC %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i64) = G_TRUNC [[UV]](i128) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i64) = G_TRUNC [[UV1]](i128) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[TRUNC]](i64), [[TRUNC1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i64>) = G_TRUNC %0(<2 x i128>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -331,15 +331,15 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s32>) = G_TRUNC %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i128) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i128) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i32>) = G_TRUNC %0(<2 x i128>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -351,21 +351,21 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<2 x s16>) = G_TRUNC %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV]](i128) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV1]](i128) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<2 x i16>) = G_TRUNC %0(<2 x i128>) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -377,17 +377,17 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1 - %3:_(<2 x s32>) = G_TRUNC %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i96) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<2 x i96>) = G_BUILD_VECTOR %0(i96), %1(i96) + %3:_(<2 x i32>) = G_TRUNC %2(<2 x i96>) + $vgpr0_vgpr1 = COPY %3(<2 x i32>) ... --- @@ -399,23 +399,23 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1 - %3:_(<2 x s16>) = G_TRUNC %2 - $vgpr0 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i96) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x i16>) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<2 x i96>) = G_BUILD_VECTOR %0(i96), %1(i96) + %3:_(<2 x i16>) = G_TRUNC %2(<2 x i96>) + $vgpr0 = COPY %3(<2 x i16>) ... --- @@ -427,16 +427,16 @@ body: | ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1 - %3:_(<2 x s8>) = G_TRUNC %2 - %4:_(<2 x s32>) = G_ANYEXT %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i96) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[TRUNC]](i32), [[TRUNC1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<2 x i96>) = G_BUILD_VECTOR %0(i96), %1(i96) + %3:_(<2 x i8>) = G_TRUNC %2(<2 x i96>) + %4:_(<2 x i32>) = G_ANYEXT %3(<2 x i8>) + S_ENDPGM 0, implicit %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir index 9f43d664d1edd..affeeae404c6f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: test_uadde_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_UADDE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDE1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_UADDE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -39,36 +39,36 @@ body: | ; CHECK-LABEL: name: test_uadde_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV4]], [[ICMP]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV5]], [[ICMP1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDE]](s32), [[UADDE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %3 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %2, %4 - %6:_(<2 x s32>), %7:_(<2 x s1>) = G_UADDE %0, %1, %5 - %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %6 - $vgpr2_vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV]](i32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[C]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV2]], [[UV4]], [[ICMP]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV3]], [[UV5]], [[ICMP1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UADDE]](i32), [[UADDE2]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDE1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDE3]](i1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(<2 x i32>) = G_BUILD_VECTOR %3(i32), %3(i32) + %5:_(<2 x i1>) = G_ICMP intpred(eq), %2(<2 x i32>), %4 + %6:_(<2 x i32>), %7:_(<2 x i1>) = G_UADDE %0, %1, %5 + %8:_(<2 x i32>) = G_ZEXT %7(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) + $vgpr2_vgpr3 = COPY %8(<2 x i32>) ... --- @@ -80,31 +80,31 @@ body: | ; CHECK-LABEL: name: test_uadde_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %13, [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[AND]], [[COPY2]], [[ICMP]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UADDE]](s32), [[AND1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s16) = G_TRUNC %0 - %6:_(s16) = G_TRUNC %1 - %7:_(s16), %8:_(s1) = G_UADDE %6, %7, %4 - %9:_(s32) = G_ANYEXT %7 - %10:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND %13, [[C1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND1]](i32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[AND]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UADDE]](i32), [[AND1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i16) = G_TRUNC %0(i32) + %6:_(i16) = G_TRUNC %1(i32) + %7:_(i16), %8:_(i1) = G_UADDE %6, %7, %4 + %9:_(i32) = G_ANYEXT %7(i16) + %10:_(i32) = G_ZEXT %8(i1) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) ... --- @@ -116,26 +116,26 @@ body: | ; CHECK-LABEL: name: test_uadde_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV]], [[UV2]], [[ICMP]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDE1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[UADDE2]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE3]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s64), %6:_(s1) = G_UADDE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0_vgpr1 = COPY %5 - $vgpr2 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV]], [[UV2]], [[ICMP]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDE1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDE]](i32), [[UADDE2]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDE3]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i64), %6:_(i1) = G_UADDE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0_vgpr1 = COPY %5(i64) + $vgpr2 = COPY %7(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir index b2fe9b8ddc903..7c43eb7ffb4d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -10,18 +10,18 @@ body: | ; CHECK-LABEL: name: test_uaddo_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_UADDO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_UADDO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0 = COPY %2(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -33,28 +33,28 @@ body: | ; CHECK-LABEL: name: test_uaddo_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD]](i32), [[AND2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7), %5:_(i1) = G_UADDO %2, %3 + %6:_(i32) = G_ZEXT %4(i7) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) - %4:_(s7), %5:_(s1) = G_UADDO %2, %3 - %6:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 ... --- @@ -66,28 +66,28 @@ body: | ; CHECK-LABEL: name: test_uaddo_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD]](i32), [[AND2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16), %5:_(i1) = G_UADDO %2, %3 + %6:_(i32) = G_ZEXT %4(i16) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) - %4:_(s16), %5:_(s1) = G_UADDO %2, %3 - %6:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 ... --- @@ -99,22 +99,22 @@ body: | ; CHECK-LABEL: name: test_uaddo_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE1]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64), %3:_(s1) = G_UADDO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDE1]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64), %3:_(i1) = G_UADDO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0_vgpr1 = COPY %2(i64) + $vgpr2 = COPY %4(i32) ... --- @@ -126,41 +126,41 @@ body: | ; CHECK-LABEL: name: test_uaddo_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>), %3:_(<2 x s1>) = G_UADDO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1_vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD]](i32), [[AND2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD1]](i32), [[AND3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AND3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY3]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND4]](i32), [[AND5]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>), %3:_(<2 x i1>) = G_UADDO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0 = COPY %2(<2 x i16>) + $vgpr1_vgpr2 = COPY %4(<2 x i32>) ... --- @@ -171,71 +171,71 @@ body: | ; CHECK-LABEL: name: test_uaddo_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND4]], [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND6]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>), %7:_(<3 x s1>) = G_UADDO %2, %4 - %8:_(<3 x s16>) = G_IMPLICIT_DEF - %9:_(<6 x s16>) = G_CONCAT_VECTORS %6, %8 - %10:_(<3 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %9 - $vgpr0_vgpr1_vgpr2 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD]](i32), [[AND2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD1]](i32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD2]](i32), [[AND6]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AND3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY3]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[AND6]](i32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND8]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND9]](i32), [[AND10]](i32), [[AND11]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>), %7:_(<3 x i1>) = G_UADDO %2, %4 + %8:_(<3 x i16>) = G_IMPLICIT_DEF + %9:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %8(<3 x i16>) + %10:_(<3 x i32>) = G_ZEXT %7(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %9(<6 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %10(<3 x i32>) ... --- @@ -247,65 +247,65 @@ body: | ; CHECK-LABEL: name: test_uaddo_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND4]], [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND6]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND7]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND7]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 - %2:_(<4 x s16>), %3:_(<4 x s1>) = G_UADDO %0, %1 - %4:_(<4 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD]](i32), [[AND2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD1]](i32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD2]](i32), [[AND6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ADD3]], [[C1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[ADD3]](i32), [[AND7]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AND3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY3]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[AND6]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[AND7]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY5]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP3]](i1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND8]](i32), [[AND9]](i32), [[AND10]](i32), [[AND11]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr1_vgpr2 + %2:_(<4 x i16>), %3:_(<4 x i1>) = G_UADDO %0, %1 + %4:_(<4 x i32>) = G_ZEXT %3(<4 x i1>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) + $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4(<4 x i32>) ... --- @@ -317,25 +317,25 @@ body: | ; CHECK-LABEL: name: test_uaddo_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDO]](s32), [[UADDO2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO3]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UADDO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UADDO]](i32), [[UADDO2]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDO1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDO3]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>), %3:_(<2 x i1>) = G_UADDO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index cc25924eade62..35cb6c7978910 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -14,54 +14,54 @@ body: | ; GFX6-LABEL: name: uaddsat_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX8-LABEL: name: uaddsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[UADDSAT]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: uaddsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_UADDSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[UADDSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_UADDSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -73,54 +73,54 @@ body: | ; GFX6-LABEL: name: uaddsat_s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX8-LABEL: name: uaddsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[UADDSAT]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: uaddsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_UADDSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[UADDSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_UADDSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -132,101 +132,101 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C2]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C1]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]] - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[COPY2]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C1]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C2]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C1]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[SHL2]], [[C2]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[ADD1]], [[C1]](i32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[COPY2]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: uaddsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR3]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[UADDSAT]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(i16) = G_UADDSAT [[SHL2]], [[SHL3]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[UADDSAT1]], [[C1]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[LSHR3]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: uaddsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UADDSAT]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s8>) = G_BITCAST %2 - %5:_(<2 x s8>) = G_BITCAST %3 - %6:_(<2 x s8>) = G_UADDSAT %4, %5 - %7:_(s16) = G_BITCAST %6 - %8:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x i16>) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[UADDSAT]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i8>) = G_BITCAST %2(i16) + %5:_(<2 x i8>) = G_BITCAST %3(i16) + %6:_(<2 x i8>) = G_UADDSAT %4, %5 + %7:_(i16) = G_BITCAST %6(<2 x i8>) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -238,46 +238,46 @@ body: | ; GFX6-LABEL: name: uaddsat_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX8-LABEL: name: uaddsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDSAT]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: uaddsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UADDSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UADDSAT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UADDSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -289,65 +289,65 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL4]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[ADD1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: uaddsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC2]] - ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: uaddsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UADDSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x i16>) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UADDSAT %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -359,143 +359,143 @@ body: | ; GFX6-LABEL: name: uaddsat_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] - ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[ADD1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SHL4]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[XOR2]], [[SHL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[ADD2]], [[C]](i32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR4]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR6]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] - ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] - ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT2]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_UADDSAT %1, %2 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x i16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x i16>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UADDSAT]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UADDSAT1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_UADDSAT %1, %2 + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -507,109 +507,109 @@ body: | ; GFX6-LABEL: name: uaddsat_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] - ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SHL6]], [[C1]] - ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] - ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL9]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[ADD]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[ADD1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[SHL4]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[XOR2]], [[SHL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[ADD2]], [[C]](i32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[C]](i32) + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[SHL6]], [[C1]] + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[XOR3]], [[SHL7]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[SHL6]], [[UMIN3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[ADD3]], [[C]](i32) + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[LSHR7]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR6]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: uaddsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC4]] - ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC5]] - ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC6]] - ; GFX8-NEXT: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC3]], [[TRUNC7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[UADDSAT3:%[0-9]+]]:_(i16) = G_UADDSAT [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDSAT3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: uaddsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x s16>), [[UADDSAT1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_UADDSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x i16>) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x i16>) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x i16>), [[UADDSAT1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -621,33 +621,33 @@ body: | ; GFX6-LABEL: name: uaddsat_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] - ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY]], [[C]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[COPY1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[UMIN]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](i32) ; ; GFX8-LABEL: name: uaddsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[UADDSAT]](i32) ; ; GFX9-LABEL: name: uaddsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UADDSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(i32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UADDSAT %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -659,47 +659,47 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[C]] - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[UV2]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UMIN]] - ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[C]] - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[UV3]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[C]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[XOR]], [[UV2]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV]], [[UMIN]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV1]], [[C]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[XOR1]], [[UV3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UV1]], [[UMIN1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ADD]](i32), [[ADD1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: uaddsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(i32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(i32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UADDSAT]](i32), [[UADDSAT1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: uaddsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_UADDSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(i32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(i32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UADDSAT]](i32), [[UADDSAT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -711,48 +711,48 @@ body: | ; GFX6-LABEL: name: uaddsat_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[UADDE1]](i1), [[C]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX8-LABEL: name: uaddsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[UADDE1]](i1), [[C]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: uaddsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_UADDSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[UADDE1]](i1), [[C]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -764,73 +764,73 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[UADDE1]](i1), [[C]], [[MV]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[UADDE3]](i1), [[C]], [[MV1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: uaddsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[UADDE1]](i1), [[C]], [[MV]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[UADDE3]](i1), [[C]], [[MV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: uaddsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_UADDSAT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[UADDE1]](i1), [[C]], [[MV]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO2]](i32), [[UADDE2]](i32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[UADDE3]](i1), [[C]], [[MV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_UADDSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir index 65abd75f478e6..f55f0d898648f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir @@ -12,16 +12,16 @@ body: | ; GCN-LABEL: name: test_ubfx_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %copy:_(s32) = COPY $vgpr0 - ; GCN-NEXT: %offset:_(s32) = COPY $vgpr1 - ; GCN-NEXT: %width:_(s32) = COPY $vgpr2 - ; GCN-NEXT: %ubfx:_(s32) = G_UBFX %copy, %offset(s32), %width - ; GCN-NEXT: $vgpr0 = COPY %ubfx(s32) - %copy:_(s32) = COPY $vgpr0 - %offset:_(s32) = COPY $vgpr1 - %width:_(s32) = COPY $vgpr2 - %ubfx:_(s32) = G_UBFX %copy, %offset(s32), %width - $vgpr0 = COPY %ubfx(s32) + ; GCN-NEXT: %copy:_(i32) = COPY $vgpr0 + ; GCN-NEXT: %offset:_(i32) = COPY $vgpr1 + ; GCN-NEXT: %width:_(i32) = COPY $vgpr2 + ; GCN-NEXT: %ubfx:_(i32) = G_UBFX %copy, %offset(i32), %width + ; GCN-NEXT: $vgpr0 = COPY %ubfx(i32) + %copy:_(i32) = COPY $vgpr0 + %offset:_(i32) = COPY $vgpr1 + %width:_(i32) = COPY $vgpr2 + %ubfx:_(i32) = G_UBFX %copy, %offset(i32), %width + $vgpr0 = COPY %ubfx(i32) ... --- @@ -33,16 +33,16 @@ body: | ; GCN-LABEL: name: test_ubfx_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %offset:_(s32) = COPY $vgpr2 - ; GCN-NEXT: %width:_(s32) = COPY $vgpr3 - ; GCN-NEXT: %ubfx:_(s64) = G_UBFX %copy, %offset(s32), %width - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %ubfx(s64) - %copy:_(s64) = COPY $vgpr0_vgpr1 - %offset:_(s32) = COPY $vgpr2 - %width:_(s32) = COPY $vgpr3 - %ubfx:_(s64) = G_UBFX %copy, %offset(s32), %width - $vgpr0_vgpr1 = COPY %ubfx(s64) + ; GCN-NEXT: %copy:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %offset:_(i32) = COPY $vgpr2 + ; GCN-NEXT: %width:_(i32) = COPY $vgpr3 + ; GCN-NEXT: %ubfx:_(i64) = G_UBFX %copy, %offset(i32), %width + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %ubfx(i64) + %copy:_(i64) = COPY $vgpr0_vgpr1 + %offset:_(i32) = COPY $vgpr2 + %width:_(i32) = COPY $vgpr3 + %ubfx:_(i64) = G_UBFX %copy, %offset(i32), %width + $vgpr0_vgpr1 = COPY %ubfx(i64) ... --- @@ -54,24 +54,24 @@ body: | ; GCN-LABEL: name: test_ubfx_s8 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[AND]](s32), [[AND1]] - ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UBFX]], [[C]] - ; GCN-NEXT: $vgpr0 = COPY [[AND2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %copy:_(s8) = G_TRUNC %0 - %offset:_(s8) = G_TRUNC %1 - %width:_(s8) = G_TRUNC %2 - %ubfx:_(s8) = G_UBFX %copy, %offset, %width - %4:_(s32) = G_ZEXT %ubfx - $vgpr0 = COPY %4 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GCN-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(i32) = G_UBFX [[COPY]], [[AND]](i32), [[AND1]] + ; GCN-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UBFX]], [[C]] + ; GCN-NEXT: $vgpr0 = COPY [[AND2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %copy:_(i8) = G_TRUNC %0(i32) + %offset:_(i8) = G_TRUNC %1(i32) + %width:_(i8) = G_TRUNC %2(i32) + %ubfx:_(i8) = G_UBFX %copy, %offset(i8), %width + %7:_(i32) = G_ZEXT %ubfx(i8) + $vgpr0 = COPY %7(i32) ... --- @@ -83,23 +83,22 @@ body: | ; GCN-LABEL: name: test_ubfx_s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[AND]](s32), [[AND1]] - ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UBFX]], [[C]] - ; GCN-NEXT: $vgpr0 = COPY [[AND2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %copy:_(s16) = G_TRUNC %0 - %offset:_(s16) = G_TRUNC %1 - %width:_(s16) = G_TRUNC %2 - %sbfx:_(s16) = G_UBFX %copy, %offset, %width - %4:_(s32) = G_ZEXT %sbfx - $vgpr0 = COPY %4 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GCN-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(i32) = G_UBFX [[COPY]], [[AND]](i32), [[AND1]] + ; GCN-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UBFX]], [[C]] + ; GCN-NEXT: $vgpr0 = COPY [[AND2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %copy:_(i16) = G_TRUNC %0(i32) + %offset:_(i16) = G_TRUNC %1(i32) + %width:_(i16) = G_TRUNC %2(i32) + %sbfx:_(i16) = G_UBFX %copy, %offset(i16), %width + %7:_(i32) = G_ZEXT %sbfx(i16) + $vgpr0 = COPY %7(i32) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index bfba201e264b1..2004c1d07e001 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -14,122 +14,122 @@ body: | ; GFX6-LABEL: name: test_udiv_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[COPY1]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX8-LABEL: name: test_udiv_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[COPY1]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX9-LABEL: name: test_udiv_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[COPY1]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX10-LABEL: name: test_udiv_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[COPY1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UDIV %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -141,210 +141,210 @@ body: | ; GFX6-LABEL: name: test_udiv_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[UV2]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[UV3]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[UV3]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[UV3]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C2]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT2]](i32), [[SELECT5]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_udiv_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[UV2]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[UV3]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C2]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[UV3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[UV3]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT2]](i32), [[SELECT5]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_udiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[UV2]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[UV3]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C2]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[UV3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[UV3]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT2]](i32), [[SELECT5]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_udiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_UDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[UV2]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C2]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[UV3]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C2]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[UV3]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[UV3]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C2]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT2]](i32), [[SELECT5]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_UDIV %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -356,610 +356,610 @@ body: | ; GFX6-LABEL: name: test_udiv_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD15]](i32) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX8-LABEL: name: test_udiv_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD11]](i32) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX9-LABEL: name: test_udiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD11]](i32) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX10-LABEL: name: test_udiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_UDIV %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD15]](i32) + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV14]](i32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV20]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV22]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_UDIV %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -971,1170 +971,1170 @@ body: | ; GFX6-LABEL: name: test_udiv_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) - ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) - ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] - ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] - ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) - ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] - ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] - ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]] - ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] - ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] - ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] - ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) - ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD15]](i32) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV11]], [[ADD17]] + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV17]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV16]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV17]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV18]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV17]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV16]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV17]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV20]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV22]](i32) + ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV23]](i32) + ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV24]], [[UV26]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] + ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[USUBO6]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[USUBE8]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO6]], [[FPTOUI3]] + ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[USUBO6]], [[FPTOUI2]] + ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[ADD20]] + ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[USUBO6]], [[UADDO48]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[USUBE8]], [[UADDO48]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[USUBO6]], [[UADDE8]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[USUBO6]], [[UADDO48]] + ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UADDO48]], [[ADD25]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[MUL24]] + ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[ADD25]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[ADD25]] + ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[ADD26]] + ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[ADD25]] + ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(i32) = G_MUL [[UV31]], [[UADDO60]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(i32) = G_MUL [[UV30]], [[UADDE10]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(i32) = G_UMULH [[UV30]], [[UADDO60]] + ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(i32) = G_MUL [[UV31]], [[UADDE10]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(i32) = G_UMULH [[UV31]], [[UADDO60]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(i32) = G_UMULH [[UV30]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[UADDO68]], [[ADD30]] + ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(i32) = G_UMULH [[UV31]], [[UADDE10]] + ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO70]](i32), [[ADD33]](i32) + ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(i32) = G_MUL [[UV32]], [[UADDO70]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(i32) = G_MUL [[UV33]], [[UADDO70]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(i32) = G_MUL [[UV32]], [[ADD33]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(i32) = G_UMULH [[UV32]], [[UADDO70]] + ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV28]], [[MUL33]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV29]], [[ADD35]] + ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE10]](i32), [[UV35]] + ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO8]](i32), [[UV34]] + ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE10]](i32), [[UV35]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[USUBO8]], [[UV34]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] + ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] + ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UV36]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO72]](i32), [[UADDE12]](i32) + ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV35]] + ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV34]] + ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV35]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[UV38]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO74]](i32), [[UADDE14]](i32) + ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: test_udiv_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV34]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV35]](s32) - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV36]], [[UV38]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] - ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] - ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) - ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] - ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[UV56]] - ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV59]] - ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV58]] - ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV59]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV58]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] - ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] - ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) - ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD11]](i32) + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV25]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV23]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[UV26]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV29]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV28]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV29]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV30]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV29]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV28]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV29]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV32]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV34]](i32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV35]](i32) + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV36]], [[UV38]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[FPTOUI2]], [[C4]] + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV41]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV40]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV42]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV40]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH12]] + ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV42]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV40]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV42]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH14]] + ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[ADD12]] + ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV42]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[UADDO48]], [[C4]] + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV45]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[UADDE8]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](i32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[UV44]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO48]], [[UV46]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[UV44]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH16]] + ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[UV46]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[UV44]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[UV46]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH18]] + ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[ADD16]] + ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[UV46]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV51]], [[UADDO60]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV50]], [[UADDE10]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV50]], [[UADDO60]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH20]] + ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV51]], [[UADDE10]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV51]], [[UADDO60]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV50]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH22]] + ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[UADDO68]], [[ADD20]] + ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV51]], [[UADDE10]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO70]](i32), [[ADD23]](i32) + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV52]](i32), [[UADDO70]], [[C4]] + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV55]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV52]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV53]](i32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV48]], [[UV54]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV49]], [[UV56]] + ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE10]](i32), [[UV59]] + ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO8]](i32), [[UV58]] + ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE10]](i32), [[UV59]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[USUBO8]], [[UV58]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] + ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UV60]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO72]](i32), [[UADDE12]](i32) + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV59]] + ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV58]] + ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV59]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[UV62]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO74]](i32), [[UADDE14]](i32) + ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_udiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV34]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV35]](s32) - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV36]], [[UV38]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] - ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) - ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[UV56]] - ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV59]] - ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV58]] - ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV59]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV58]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] - ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] - ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) - ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD11]](i32) + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV25]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV23]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[UV26]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV29]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV28]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV29]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV30]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV29]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV28]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV29]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV32]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV34]](i32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV35]](i32) + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV36]], [[UV38]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[FPTOUI2]], [[C4]] + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV41]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV40]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV42]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV40]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH12]] + ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV42]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV40]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV42]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH14]] + ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[ADD12]] + ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV42]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[UADDO48]], [[C4]] + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV45]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[UADDE8]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](i32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[UV44]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO48]], [[UV46]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[UV44]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH16]] + ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[UV46]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[UV44]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[UV46]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH18]] + ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[ADD16]] + ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[UV46]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV51]], [[UADDO60]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV50]], [[UADDE10]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV50]], [[UADDO60]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH20]] + ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV51]], [[UADDE10]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV51]], [[UADDO60]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV50]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH22]] + ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[UADDO68]], [[ADD20]] + ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV51]], [[UADDE10]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO70]](i32), [[ADD23]](i32) + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV52]](i32), [[UADDO70]], [[C4]] + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV55]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV52]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV53]](i32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(i32), [[UV57:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV48]], [[UV54]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV49]], [[UV56]] + ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(i32), [[UV59:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE10]](i32), [[UV59]] + ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO8]](i32), [[UV58]] + ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE10]](i32), [[UV59]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[USUBO8]], [[UV58]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] + ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(i32), [[UV61:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UV60]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO72]](i32), [[UADDE12]](i32) + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV59]] + ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV58]] + ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV59]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(i32), [[UV63:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[UV62]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO74]](i32), [[UADDE14]](i32) + ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX10-LABEL: name: test_udiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV22]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV23]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV22]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV23]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV24]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV25]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV26]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV27]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV28]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV29]](s32) - ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV34]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV34]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV34]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV36]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV36]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV36]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO60]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO60]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO60]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE10]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE10]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) - ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDO70]], [[C4]] - ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDO70]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV44]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD35]], [[USUBO9]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD35]] - ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV47]] - ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV46]] - ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV47]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV46]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV47]], [[USUBO9]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV48]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV49]], [[UADDO73]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV47]] - ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV46]] - ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV47]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV50]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV51]], [[UADDO75]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) - ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_UDIV %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV11]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD15]](i32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV19]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV23]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV22]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV23]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV22]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV23]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV24]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV25]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV23]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV22]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV23]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV26]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV27]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV28]](i32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV29]](i32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[UV30]], [[UV32]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[FPTOUI2]], [[C4]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[USUBO6]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[UV35]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[USUBE8]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV34]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV34]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV34]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[UADDO42]], [[UMULH14]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO45]](i1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD22]] + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(i32), [[UADDE9:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](i32), [[UADDO48]], [[C4]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](i64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO6]], [[UADDE8]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[UV37]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[USUBE8]], [[UADDO48]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[UV36]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[UV36]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[UV36]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO54]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO57]](i1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[UADDO56]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD28]] + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(i32), [[UADDE11:%[0-9]+]]:_(i1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[UV41]], [[UADDO60]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[UV40]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV40]], [[UADDO60]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH20]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UV41]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV41]], [[UADDO60]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV40]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(i32), [[UADDO69:%[0-9]+]]:_(i1) = G_UADDO [[UADDO66]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO69]](i1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(i32), [[UADDO71:%[0-9]+]]:_(i1) = G_UADDO [[UADDO68]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO71]](i1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV41]], [[UADDE10]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO70]](i32), [[ADD33]](i32) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV42]](i32), [[UADDO70]], [[C4]] + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UV42]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[UV45]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UV43]], [[UADDO70]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV38]], [[UV44]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[UV39]], [[ADD35]], [[USUBO9]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV39]], [[ADD35]] + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE10]](i32), [[UV47]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO8]](i32), [[UV46]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE10]](i32), [[UV47]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[USUBO8]], [[UV46]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV47]], [[USUBO9]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(i32), [[UADDO73:%[0-9]+]]:_(i1) = G_UADDO [[UADDO70]], [[UV48]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(i32), [[UADDE13:%[0-9]+]]:_(i1) = G_UADDE [[ADD33]], [[UV49]], [[UADDO73]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO72]](i32), [[UADDE12]](i32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV47]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV46]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV47]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C6]](i64) + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(i32), [[UADDO75:%[0-9]+]]:_(i1) = G_UADDO [[UADDO72]], [[UV50]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(i32), [[UADDE15:%[0-9]+]]:_(i1) = G_UADDE [[UADDE12]], [[UV51]], [[UADDO75]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO74]](i32), [[UADDE14]](i32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_UDIV %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -2146,141 +2146,141 @@ body: | ; GFX6-LABEL: name: test_udiv_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT2]], [[C]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX8-LABEL: name: test_udiv_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT2]], [[C]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX9-LABEL: name: test_udiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT2]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX10-LABEL: name: test_udiv_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UDIV %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT2]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UDIV %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -2292,246 +2292,246 @@ body: | ; GFX6-LABEL: name: test_udiv_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[LSHR1]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[LSHR1]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[LSHR1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT5]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: test_udiv_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[LSHR1]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[LSHR1]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[LSHR1]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT5]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_udiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT2]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[LSHR1]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[LSHR1]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[LSHR1]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX10-LABEL: name: test_udiv_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT5]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UDIV %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT2]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB4]](i32), [[LSHR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[C4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[ADD4]], [[UMULH3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[SUB4]], [[LSHR1]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB5]], [[SUB4]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT4]](i32), [[LSHR1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[SELECT3]], [[C4]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[ADD5]], [[SELECT3]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT5]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UDIV %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -2543,137 +2543,137 @@ body: | ; GFX6-LABEL: name: test_udiv_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX8-LABEL: name: test_udiv_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX9-LABEL: name: test_udiv_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX10-LABEL: name: test_udiv_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_UDIV %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_UDIV %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -2685,137 +2685,137 @@ body: | ; GFX6-LABEL: name: test_udiv_s17 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX8-LABEL: name: test_udiv_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX9-LABEL: name: test_udiv_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](i32) ; ; GFX10-LABEL: name: test_udiv_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_UDIV %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT1]](i32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_UDIV %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -2827,623 +2827,623 @@ body: | ; GFX6-LABEL: name: test_udiv_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD15]](i32) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX8-LABEL: name: test_udiv_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD11]](i32) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX9-LABEL: name: test_udiv_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD11]](i32) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX10-LABEL: name: test_udiv_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_UDIV %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO32]](i32), [[ADD15]](i32) + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV14]](i32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[UADDO32]], [[UV20]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO34]](i32), [[UADDE4]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C7]](i64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UV22]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO36]](i32), [[UADDE6]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_UDIV %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir index 65826d7658f2c..43bf4ff494936 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir @@ -11,18 +11,22 @@ body: | ; GFX6-LABEL: name: test_uitofp_s32_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s32_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[UITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_UITOFP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_UITOFP %0(i32) + %2:_(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -34,18 +38,22 @@ body: | ; GFX6-LABEL: name: test_uitofp_s32_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[COPY]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UITOFP]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_uitofp_s32_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_UITOFP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[COPY]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UITOFP]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(f64) = G_UITOFP %0(i32) + %2:_(i64) = G_BITCAST %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -57,24 +65,28 @@ body: | ; GFX6-LABEL: name: test_uitofp_v2s32_to_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UITOFP]](s32), [[UITOFP1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[UITOFP]](f32), [[UITOFP1]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x i32>) + ; ; GFX8-LABEL: name: test_uitofp_v2s32_to_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UITOFP]](s32), [[UITOFP1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_UITOFP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f32>) = G_BUILD_VECTOR [[UITOFP]](f32), [[UITOFP1]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x i32>) = G_BITCAST [[BUILD_VECTOR]](<2 x f32>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x f32>) = G_UITOFP %0(<2 x i32>) + %2:_(<2 x i32>) = G_BITCAST %1(<2 x f32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -86,40 +98,44 @@ body: | ; GFX6-LABEL: name: test_uitofp_s64_to_s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s64_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_UITOFP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f32) = G_UITOFP %0(i64) + %2:_(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- @@ -131,28 +147,32 @@ body: | ; GFX6-LABEL: name: test_uitofp_s64_to_s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UITOFP]], [[C]](s32) - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP1]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[UV1]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f64) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = G_FLDEXP [[UITOFP]], [[C]](i32) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FLDEXP]], [[UITOFP1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_uitofp_s64_to_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UITOFP]], [[C]](s32) - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP1]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_UITOFP %0 - $vgpr0_vgpr1 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[UV1]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f64) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f64) = G_FLDEXP [[UITOFP]], [[C]](i32) + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f64) = G_FADD [[FLDEXP]], [[UITOFP1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[FADD]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_UITOFP %0(i64) + %2:_(i64) = G_BITCAST %1(f64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -164,25 +184,29 @@ body: | ; GFX6-LABEL: name: test_uitofp_s16_to_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f16) = G_UITOFP [[AND]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[UITOFP]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s16_to_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[TRUNC]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_UITOFP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f16) = G_UITOFP [[TRUNC]](i16) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[UITOFP]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_UITOFP %1(i16) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -194,23 +218,27 @@ body: | ; GFX6-LABEL: name: test_uitofp_s16_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s16_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[UITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s32) = G_UITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f32) = G_UITOFP %1(i16) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -222,23 +250,27 @@ body: | ; GFX6-LABEL: name: test_uitofp_s16_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[AND]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UITOFP]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_uitofp_s16_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_UITOFP %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[AND]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UITOFP]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f64) = G_UITOFP %1(i16) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -250,26 +282,30 @@ body: | ; GFX6-LABEL: name: test_uitofp_s8_to_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f16) = G_UITOFP [[AND]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[UITOFP]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s8_to_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s16) = G_UITOFP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f16) = G_UITOFP [[AND]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[UITOFP]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(f16) = G_UITOFP %1(i8) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -281,23 +317,27 @@ body: | ; GFX6-LABEL: name: test_uitofp_s8_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s8_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[UITOFP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s32) = G_UITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UITOFP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(f32) = G_UITOFP %1(i8) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -309,23 +349,27 @@ body: | ; GFX6-LABEL: name: test_uitofp_s8_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[AND]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UITOFP]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_uitofp_s8_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s64) = G_UITOFP %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f64) = G_UITOFP [[AND]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[UITOFP]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(f64) = G_UITOFP %1(i8) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -337,28 +381,32 @@ body: | ; GFX6-LABEL: name: test_uitofp_s1_to_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s1_to_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_UITOFP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH3C00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f16) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[SELECT]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(f16) = G_UITOFP %1(i1) + %3:_(i16) = G_BITCAST %2(f16) + %4:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) ... --- @@ -370,25 +418,29 @@ body: | ; GFX6-LABEL: name: test_uitofp_s1_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s1_to_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_UITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f32) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[SELECT]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(f32) = G_UITOFP %1(i1) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -400,25 +452,29 @@ body: | ; GFX6-LABEL: name: test_uitofp_s1_to_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + ; ; GFX8-LABEL: name: test_uitofp_s1_to_s64 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[C]], [[C1]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_UITOFP %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f64) = G_FCONSTANT double 1.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(f64) = G_SELECT [[TRUNC]](i1), [[C]], [[C1]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[SELECT]](f64) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(f64) = G_UITOFP %1(i1) + %3:_(i64) = G_BITCAST %2(f64) + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -430,45 +486,49 @@ body: | ; GFX6-LABEL: name: test_uitofp_s33_to_s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C2]], [[UV2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C2]], [[UV2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s33_to_s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C2]], [[UV2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s33) = G_TRUNC %0 - %2:_(s32) = G_UITOFP %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[AND]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C2]], [[UV2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[FLDEXP]](f32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i33) = G_TRUNC %0(i64) + %2:_(f32) = G_UITOFP %1(i33) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -480,45 +540,49 @@ body: | ; GFX6-LABEL: name: test_uitofp_s64_to_s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; ; GFX8-LABEL: name: test_uitofp_s64_to_s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_UITOFP %0 - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV3]], [[UMIN1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f16) = G_UITOFP %0(i64) + %2:_(i16) = G_BITCAST %1(f16) + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... --- @@ -530,76 +594,84 @@ body: | ; GFX6-LABEL: name: test_sitofp_v2s64_to_v2s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32) - ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) - ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]] - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32) - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP1]], [[SUB1]](s32) - ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX6-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV3]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UMIN]](i32) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV4]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV5]], [[UMIN1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV7]](i32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UMIN2]](i32) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL1]](i64) + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV8]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV9]], [[UMIN3]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[OR1]](i32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN2]] + ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP1]], [[SUB1]](i32) + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP1]](f32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST]](i16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + ; ; GFX8-LABEL: name: test_sitofp_v2s64_to_v2s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32) - ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) - ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]] - ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32) - ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32) - ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) - ; GFX8-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]] - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32) - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]] - ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP1]], [[SUB1]](s32) - ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s16>) = G_UITOFP %0 - $vgpr0 = COPY %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 32 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV3]](i32) + ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[UMIN]](i32) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL]](i64) + ; GFX8-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV4]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UV5]], [[UMIN1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[OR]](i32) + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN]] + ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP]], [[SUB]](i32) + ; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP]](f32) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(i32) = G_AMDGPU_FFBH_U32 [[UV7]](i32) + ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[UMIN2]](i32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[SHL1]](i64) + ; GFX8-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[C1]], [[UV8]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UV9]], [[UMIN3]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[OR1]](i32) + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[C]], [[UMIN2]] + ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(f32) = G_FLDEXP [[UITOFP1]], [[SUB1]](i32) + ; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FLDEXP1]](f32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST1]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST2]](<2 x f16>) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x i16>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x f16>) = G_UITOFP %0(<2 x i64>) + %2:_(<2 x i16>) = G_BITCAST %1(<2 x f16>) + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index 3f8bbbce2d31d..21520d84347eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -14,30 +14,30 @@ body: | ; SI-LABEL: name: test_umax_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](i32) ; ; VI-LABEL: name: test_umax_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[UMAX]](i32) ; ; GFX9-LABEL: name: test_umax_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UMAX %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -49,33 +49,33 @@ body: | ; SI-LABEL: name: test_umax_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ugt), [[COPY]](i64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; VI-LABEL: name: test_umax_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ugt), [[COPY]](i64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: test_umax_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_UMAX %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ugt), [[COPY]](i64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_UMAX %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -87,42 +87,42 @@ body: | ; SI-LABEL: name: test_umax_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](i32) ; ; VI-LABEL: name: test_umax_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_umax_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -134,48 +134,48 @@ body: | ; SI-LABEL: name: test_umax_s8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](i32) ; ; VI-LABEL: name: test_umax_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[AND]], [[AND1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_umax_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_UMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[AND]], [[AND1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_UMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -187,42 +187,42 @@ body: | ; SI-LABEL: name: test_umax_s17 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](i32) ; ; VI-LABEL: name: test_umax_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; VI-NEXT: $vgpr0 = COPY [[UMAX]](i32) ; ; GFX9-LABEL: name: test_umax_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_UMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_UMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -234,42 +234,42 @@ body: | ; SI-LABEL: name: test_umax_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[UV]], [[UV2]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_umax_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[UV]], [[UV2]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_umax_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_UMAX %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_UMAX %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -281,45 +281,45 @@ body: | ; SI-LABEL: name: test_umax_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[UV]], [[UV3]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[UV1]], [[UV4]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(i32) = G_UMAX [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32), [[UMAX2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_umax_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[UV]], [[UV3]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[UV1]], [[UV4]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(i32) = G_UMAX [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32), [[UMAX2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_umax_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; GFX9-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_UMAX %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[UV1]], [[UV4]] + ; GFX9-NEXT: [[UMAX2:%[0-9]+]]:_(i32) = G_UMAX [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32), [[UMAX2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_UMAX %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -331,57 +331,57 @@ body: | ; SI-LABEL: name: test_umax_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMAX1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMAX]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UMAX1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UMAX]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; VI-LABEL: name: test_umax_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UMAX]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UMAX1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_umax_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x i16>) = G_UMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -393,81 +393,81 @@ body: | ; SI-LABEL: name: test_umax_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(i32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMAX]](i32), [[UMAX1]](i32), [[UMAX2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_umax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX1]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX2]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_IMPLICIT_DEF - %2:_(<3 x s16>) = G_UMAX %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - S_NOP 0, implicit %3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x i16>) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UMAX]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMAX1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[LSHR]](i32), [[ANYEXT]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<3 x i16>) = G_IMPLICIT_DEF + %2:_(<3 x i16>) = G_UMAX %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -479,91 +479,91 @@ body: | ; SI-LABEL: name: test_umax_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[LSHR1]], [[LSHR3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMAX1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMAX]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UMAX3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UMAX2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(i32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(i32) = G_UMAX [[LSHR]], [[LSHR2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(i32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(i32) = G_UMAX [[LSHR1]], [[LSHR3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UMAX1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UMAX]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UMAX3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UMAX2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_umax_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[UMAX3:%[0-9]+]]:_(i16) = G_UMAX [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UMAX]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UMAX1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UMAX2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UMAX3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_UMAX %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x i16>) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x i16>) = G_UMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UMAX]](<2 x i16>), [[UMAX1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_UMAX %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index f01143f0e0a9b..04d8dbe2481b3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -14,30 +14,30 @@ body: | ; SI-LABEL: name: test_umin_s32 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](i32) ; ; VI-LABEL: name: test_umin_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[UMIN]](i32) ; ; GFX9-LABEL: name: test_umin_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UMIN %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -49,33 +49,33 @@ body: | ; SI-LABEL: name: test_umin_s64 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY]](i64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; VI-LABEL: name: test_umin_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY]](i64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: test_umin_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_UMIN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ult), [[COPY]](i64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_UMIN %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -87,42 +87,42 @@ body: | ; SI-LABEL: name: test_umin_s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](i32) ; ; VI-LABEL: name: test_umin_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_umin_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -134,48 +134,48 @@ body: | ; SI-LABEL: name: test_umin_s8 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](i32) ; ; VI-LABEL: name: test_umin_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[AND]], [[AND1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN]](i16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_umin_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_UMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[AND]], [[AND1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_UMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -187,42 +187,42 @@ body: | ; SI-LABEL: name: test_umin_s17 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](i32) ; ; VI-LABEL: name: test_umin_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; VI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; VI-NEXT: $vgpr0 = COPY [[UMIN]](i32) ; ; GFX9-LABEL: name: test_umin_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_UMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_UMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -234,42 +234,42 @@ body: | ; SI-LABEL: name: test_umin_v2s32 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV2]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; VI-LABEL: name: test_umin_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV2]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_umin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_UMIN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_UMIN %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -281,45 +281,45 @@ body: | ; SI-LABEL: name: test_umin_v3s32 ; SI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV3]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV4]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32), [[UMIN2]](i32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_umin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV3]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV4]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32), [[UMIN2]](i32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_umin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; GFX9-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_UMIN %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV4]] + ; GFX9-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32), [[UMIN2]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_UMIN %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -331,57 +331,57 @@ body: | ; SI-LABEL: name: test_umin_v2s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMIN1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMIN]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UMIN1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UMIN]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; VI-LABEL: name: test_umin_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] - ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UMIN]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UMIN1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_umin_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x i16>) = G_UMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -393,81 +393,81 @@ body: | ; SI-LABEL: name: test_umin_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UMIN]](i32), [[UMIN1]](i32), [[UMIN2]](i32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; VI-LABEL: name: test_umin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN]](i16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN1]](i16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN2]](i16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(<3 x s16>) = G_IMPLICIT_DEF - %2:_(<3 x s16>) = G_UMIN %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - S_NOP 0, implicit %3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x i16>) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UMIN]](<2 x i16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[UMIN1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST2]](i32), [[LSHR]](i32), [[ANYEXT]](i32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(<3 x i16>) = G_IMPLICIT_DEF + %2:_(<3 x i16>) = G_UMIN %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + S_NOP 0, implicit %3(<3 x i32>) ... --- @@ -479,91 +479,91 @@ body: | ; SI-LABEL: name: test_umin_v4s16 ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[LSHR1]], [[LSHR3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMIN1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMIN]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UMIN3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UMIN2]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[LSHR]], [[LSHR2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[LSHR1]], [[LSHR3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UMIN1]], [[C]](i32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[UMIN]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UMIN3]], [[C]](i32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[UMIN2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; VI-LABEL: name: test_umin_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC4]] - ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] - ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] - ; VI-NEXT: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN3]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; VI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[UMIN3:%[0-9]+]]:_(i16) = G_UMIN [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UMIN]](i16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UMIN1]](i16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UMIN2]](i16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UMIN3]](i16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_UMIN %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x i16>) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x i16>) = G_UMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UMIN]](<2 x i16>), [[UMIN1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_UMIN %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 3a919f004964b..821fb3f42e44f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -11,22 +11,22 @@ body: | ; GFX8-LABEL: name: test_umulh_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[UMULH]](i32) ; ; GFX9-LABEL: name: test_umulh_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[UMULH]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UMULH %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMULH]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UMULH %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -38,30 +38,30 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMULH]](i32), [[UMULH1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_umulh_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_UMULH %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UMULH]](i32), [[UMULH1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_UMULH %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -73,68 +73,68 @@ body: | ; GFX8-LABEL: name: test_umulh_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV2]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) ; ; GFX9-LABEL: name: test_umulh_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_UMULH %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV2]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_UMULH %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -146,122 +146,122 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV7]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV7]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UV11]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UV11]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO18]](i32), [[ADD7]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_umulh_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_UMULH %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV7]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV7]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UV11]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UV11]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO18]](i32), [[ADD7]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV]](i64), [[MV1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_UMULH %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -273,35 +273,35 @@ body: | ; GFX8-LABEL: name: test_umulh_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[MUL]], [[C1]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX9-LABEL: name: test_umulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMULH %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[MUL]], [[C1]](i32) + ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMULH %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -313,45 +313,45 @@ body: | ; GFX8-LABEL: name: test_umulh_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[MUL]], [[C1]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX9-LABEL: name: test_umulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_UMULH %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[MUL]], [[C1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_UMULH %2, %3 + %5:_(i32) = G_ZEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -362,55 +362,49 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[LSHR2]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[MUL]], [[C1]](i32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[MUL1]], [[C1]](i32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_umulh_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s16>) = G_TRUNC %0 - %3:_(<2 x s16>) = G_TRUNC %1 - %4:_(<2 x s16>) = G_UMULH %2, %3 - %5:_(<2 x s32>) = G_ZEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[MUL]], [[C1]](i32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND2]], [[AND3]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[MUL1]], [[C1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[LSHR]](i32), [[LSHR1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %3:_(<2 x i16>) = G_TRUNC %1(<2 x i32>) + %4:_(<2 x i16>) = G_UMULH %2, %3 + %5:_(<2 x i32>) = G_ZEXT %4(<2 x i16>) + $vgpr0_vgpr1 = COPY %5(<2 x i32>) ... --- @@ -421,114 +415,114 @@ body: | ; GFX8-LABEL: name: test_umulh_v3s8 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[LSHR1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C1]](s16) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[MUL]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[MUL1]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C]] + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i16) = G_MUL [[AND4]], [[AND5]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[MUL2]], [[C1]](i16) + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[LSHR1]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND6]], [[C1]](i16) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[LSHR2]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_umulh_v3s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND]](s16), [[AND1]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND2]](s16), [[AND3]](s16) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND8]], [[C1]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[LSHR1]], [[SHL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %20:_(s32) = COPY $vgpr5 - %5:_(s8) = G_TRUNC %0 - %6:_(s8) = G_TRUNC %1 - %7:_(s8) = G_TRUNC %2 - %8:_(s8) = G_TRUNC %3 - %9:_(s8) = G_TRUNC %4 - %10:_(s8) = G_TRUNC %20 - %11:_(<3 x s8>) = G_BUILD_VECTOR %5, %6, %7 - %12:_(<3 x s8>) = G_BUILD_VECTOR %8, %9, %10 - %13:_(<3 x s8>) = G_UMULH %11, %12 - %14:_(s8), %15:_(s8), %16:_(s8) = G_UNMERGE_VALUES %13 - %17:_(s24) = G_MERGE_VALUES %14, %15, %16 - %18:_(s32) = G_ANYEXT %17 - $vgpr0 = COPY %18 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND]](i16), [[AND1]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND2]](i16), [[AND3]](i16) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[MUL]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[AND4]], [[AND5]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[MUL1]], [[C1]](i16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR]](<2 x i16>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C]] + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C1]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[DEF]](i32) + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(i16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[AND8]], [[C1]](i16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[LSHR1]], [[SHL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C2]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(i32) = COPY $vgpr5 + %6:_(i8) = G_TRUNC %0(i32) + %7:_(i8) = G_TRUNC %1(i32) + %8:_(i8) = G_TRUNC %2(i32) + %9:_(i8) = G_TRUNC %3(i32) + %10:_(i8) = G_TRUNC %4(i32) + %11:_(i8) = G_TRUNC %5(i32) + %12:_(<3 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8), %8(i8) + %13:_(<3 x i8>) = G_BUILD_VECTOR %9(i8), %10(i8), %11(i8) + %14:_(<3 x i8>) = G_UMULH %12, %13 + %15:_(i8), %16:_(i8), %17:_(i8) = G_UNMERGE_VALUES %14(<3 x i8>) + %18:_(i24) = G_MERGE_VALUES %15(i8), %16(i8), %17(i8) + %19:_(i32) = G_ANYEXT %18(i24) + $vgpr0 = COPY %19(i32) ... --- @@ -539,77 +533,77 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[LSHR1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[MUL]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[MUL1]], [[C1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[LSHR1]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_umulh_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND]](s16), [[AND1]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND2]](s16), [[AND3]](s16) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %5:_(s8) = G_TRUNC %0 - %6:_(s8) = G_TRUNC %1 - %7:_(s8) = G_TRUNC %2 - %8:_(s8) = G_TRUNC %3 - %11:_(<2 x s8>) = G_BUILD_VECTOR %5, %6 - %12:_(<2 x s8>) = G_BUILD_VECTOR %7, %8 - %13:_(<2 x s8>) = G_UMULH %11, %12 - %14:_(s8), %15:_(s8) = G_UNMERGE_VALUES %13 - %17:_(s16) = G_MERGE_VALUES %14, %15 - %18:_(s32) = G_ANYEXT %17 - $vgpr0 = COPY %18 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND]](i16), [[AND1]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND2]](i16), [[AND3]](i16) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C1]](i16), [[C1]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[MUL]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND5]], [[C1]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND4]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i8) = G_TRUNC %0(i32) + %5:_(i8) = G_TRUNC %1(i32) + %6:_(i8) = G_TRUNC %2(i32) + %7:_(i8) = G_TRUNC %3(i32) + %8:_(<2 x i8>) = G_BUILD_VECTOR %4(i8), %5(i8) + %9:_(<2 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8) + %10:_(<2 x i8>) = G_UMULH %8, %9 + %11:_(i8), %12:_(i8) = G_UNMERGE_VALUES %10(<2 x i8>) + %13:_(i16) = G_MERGE_VALUES %11(i8), %12(i8) + %14:_(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) ... --- @@ -620,126 +614,126 @@ body: | ; GFX8-LABEL: name: test_umulh_v4s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C4]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C4]](s16) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX8-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C4]](s16) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] - ; GFX8-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C5]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C5]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[MUL]], [[C4]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[MUL1]], [[C4]](i16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i16) = G_MUL [[AND4]], [[AND5]] + ; GFX8-NEXT: [[LSHR8:%[0-9]+]]:_(i16) = G_LSHR [[MUL2]], [[C4]](i16) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C3]] + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i16) = G_MUL [[AND6]], [[AND7]] + ; GFX8-NEXT: [[LSHR9:%[0-9]+]]:_(i16) = G_LSHR [[MUL3]], [[C4]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR6]](i16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C5]] + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR7]](i16) + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C5]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR8]](i16) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C5]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C1]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR9]](i16) + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C5]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C2]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](i32) ; ; GFX9-LABEL: name: test_umulh_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND]](s16), [[AND1]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND2]](s16), [[AND3]](s16) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND4]](s16), [[AND5]](s16) - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[AND6]](s16), [[AND7]](s16) - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR3]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR7]](<2 x s16>) - ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C5]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 - %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) = G_UNMERGE_VALUES %1 - %10:_(<4 x s8>) = G_BUILD_VECTOR %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) - %11:_(<4 x s8>) = G_BUILD_VECTOR %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) - %12:_(<4 x s8>) = G_UMULH %10:_, %11:_ - %13:_(s8), %14:_(s8), %15:_(s8), %16:_(s8) = G_UNMERGE_VALUES %12:_(<4 x s8>) - %17:_(s32) = G_MERGE_VALUES %13, %14, %15, %16 - $vgpr0 = COPY %17 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND]](i16), [[AND1]](i16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND2]](i16), [[AND3]](i16) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C4]](i16), [[C4]](i16) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[MUL]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND4]](i16), [[AND5]](i16) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC6]], [[C3]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC7]], [[C3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[AND6]](i16), [[AND7]](i16) + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i16>) = G_MUL [[BUILD_VECTOR3]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C4]](i16), [[C4]](i16) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR6]](<2 x i16>) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR7]](<2 x i16>) + ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C5]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[LSHR8]], [[C5]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND8]], [[SHL]] + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C5]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[LSHR9]], [[C5]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND11]], [[C2]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %0(i32) + %6:_(i8), %7:_(i8), %8:_(i8), %9:_(i8) = G_UNMERGE_VALUES %1(i32) + %10:_(<4 x i8>) = G_BUILD_VECTOR %2(i8), %3(i8), %4(i8), %5(i8) + %11:_(<4 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8), %8(i8), %9(i8) + %12:_(<4 x i8>) = G_UMULH %10, %11 + %13:_(i8), %14:_(i8), %15:_(i8), %16:_(i8) = G_UNMERGE_VALUES %12(<4 x i8>) + %17:_(i32) = G_MERGE_VALUES %13(i8), %14(i8), %15(i8), %16(i8) + $vgpr0 = COPY %17(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir index 13c52d08b4941..67cea695ceb0e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -11,34 +11,34 @@ body: | ; GFX8-LABEL: name: test_umulo_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH]](i32), [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](i32) ; ; GFX9-LABEL: name: test_umulo_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_UMULO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH]](i32), [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_UMULO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0 = COPY %2(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -50,56 +50,56 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH]](i32), [[C]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH1]](i32), [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[MUL]](i32), [[MUL1]](i32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) ; ; GFX9-LABEL: name: test_umulo_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UMULO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH]](i32), [[C]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH1]](i32), [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[MUL]](i32), [[MUL1]](i32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>), %3:_(<2 x i1>) = G_UMULO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... --- @@ -111,96 +111,96 @@ body: | ; GFX8-LABEL: name: test_umulo_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV2]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV6]], [[C]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV9]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV7]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV5]](i32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV8]](i32), [[UV10]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MV]](i64), [[C]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i64) = G_ZEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](i64) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](i64) ; ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64), %3:_(s1) = G_UMULO %0, %1 - %4:_(s64) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV2]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV6]], [[C]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV9]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV4]](i32), [[UV7]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV5]](i32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV8]](i32), [[UV10]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MV]](i64), [[C]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i64) = G_ZEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](i64) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64), %3:_(i1) = G_UMULO %0, %1 + %4:_(i64) = G_ZEXT %3(i1) + $vgpr0_vgpr1 = COPY %2(i64) + $vgpr2_vgpr3 = COPY %4(i64) ... --- @@ -212,180 +212,180 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV7]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV7]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV8]](i32), [[UV10]], [[C]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV13]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV8]](i32), [[UV11]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV9]](i32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV12]](i32), [[UV14]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MV]](i64), [[C]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UV19]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UV19]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UV19]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UV19]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO18]](i32), [[ADD7]](i32) + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV20]](i32), [[UV22]], [[C]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV25]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV20]](i32), [[UV23]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV21]](i32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV24]](i32), [[UV26]](i32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MV2]](i64), [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV1]](i64), [[MV3]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[ANYEXT3]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[AND]](i64), [[AND1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x i64>) ; ; GFX9-LABEL: name: test_umulo_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>), %3:_(<2 x s1>) = G_UMULO %0, %1 - %4:_(<2 x s64>) = G_ZEXT %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 - $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[UV5]], [[UV7]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV5]], [[UV7]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO8]](i32), [[ADD3]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV8]](i32), [[UV10]], [[C]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV13]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV8]](i32), [[UV11]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV9]](i32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV12]](i32), [[UV14]](i32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MV]](i64), [[C]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UV19]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO11]](i1) + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UV19]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UV19]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UV19]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UADDO18]](i32), [[ADD7]](i32) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV20]](i32), [[UV22]], [[C]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV25]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV20]](i32), [[UV23]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV21]](i32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV24]](i32), [[UV26]](i32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MV2]](i64), [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[MV1]](i64), [[MV3]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[ANYEXT3]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[AND]](i64), [[AND1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>), %3:_(<2 x i1>) = G_UMULO %0, %1 + %4:_(<2 x i64>) = G_ZEXT %3(<2 x i1>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) + $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<2 x i64>) ... --- @@ -397,51 +397,51 @@ body: | ; GFX8-LABEL: name: test_umulo_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH]](i32), [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](i32) ; ; GFX9-LABEL: name: test_umulo_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s24) = G_TRUNC %0 - %3:_(s24) = G_TRUNC %1 - %4:_(s24), %6:_(s1) = G_UMULO %2, %3 - %5:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UMULH]](i32), [[C1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i24) = G_TRUNC %0(i32) + %3:_(i24) = G_TRUNC %1(i32) + %4:_(i24), %5:_(i1) = G_UMULO %2, %3 + %6:_(i32) = G_ZEXT %4(i24) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... @@ -454,43 +454,43 @@ body: | ; GFX8-LABEL: name: test_umulo_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](i32) ; ; GFX9-LABEL: name: test_umulo_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16), %6:_(s1) = G_UMULO %2, %3 - %5:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16), %5:_(i1) = G_UMULO %2, %3 + %6:_(i32) = G_ZEXT %4(i16) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -502,43 +502,43 @@ body: | ; GFX8-LABEL: name: test_umulo_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](i32) ; ; GFX9-LABEL: name: test_umulo_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8), %6:_(s1) = G_UMULO %2, %3 - %5:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8), %5:_(i1) = G_UMULO %2, %3 + %6:_(i32) = G_ZEXT %4(i8) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -549,78 +549,71 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[LSHR]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[AND5]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND6]](i32), [[AND7]](i32) + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND8]](i32), [[AND9]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_umulo_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s16>) = G_TRUNC %0 - %3:_(<2 x s16>) = G_TRUNC %1 - %4:_(<2 x s16>), %6:_(<2 x s1>) = G_UMULO %2, %3 - %7:_(<2 x s32>) = G_ZEXT %6 - %5:_(<2 x s32>) = G_ZEXT %4 - $vgpr0_vgpr1 = COPY %5 - $vgpr2_vgpr3 = COPY %7 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[AND5]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND6]](i32), [[AND7]](i32) + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND8]](i32), [[AND9]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x i32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i16>) = G_TRUNC %0(<2 x i32>) + %3:_(<2 x i16>) = G_TRUNC %1(<2 x i32>) + %4:_(<2 x i16>), %5:_(<2 x i1>) = G_UMULO %2, %3 + %6:_(<2 x i32>) = G_ZEXT %5(<2 x i1>) + %7:_(<2 x i32>) = G_ZEXT %4(<2 x i16>) + $vgpr0_vgpr1 = COPY %7(<2 x i32>) + $vgpr2_vgpr3 = COPY %6(<2 x i32>) ... @@ -632,89 +625,89 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[AND5]] + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[MUL]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[MUL1]](i32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C2]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) ; ; GFX9-LABEL: name: test_umulo_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %5:_(s8) = G_TRUNC %0 - %6:_(s8) = G_TRUNC %1 - %7:_(s8) = G_TRUNC %2 - %8:_(s8) = G_TRUNC %3 - %11:_(<2 x s8>) = G_BUILD_VECTOR %5, %6 - %12:_(<2 x s8>) = G_BUILD_VECTOR %7, %8 - %13:_(<2 x s8>), %19:_(<2 x s1>) = G_UMULO %11, %12 - %20:_(<2 x s32>) = G_ZEXT %19 - %14:_(s8), %15:_(s8) = G_UNMERGE_VALUES %13 - %21:_(s1), %22:_(s1) = G_UNMERGE_VALUES %19 - %17:_(s16) = G_MERGE_VALUES %14, %15 - %18:_(s32) = G_ANYEXT %17 - %23:_(s32) = G_ANYEXT %21 - %24:_(s32) = G_ANYEXT %22 - $vgpr0 = COPY %18 - $vgpr1 = COPY %23 - $vgpr2 = COPY %24 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL1]](i32), [[AND5]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[MUL]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[MUL1]](i32) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND7]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i8) = G_TRUNC %0(i32) + %5:_(i8) = G_TRUNC %1(i32) + %6:_(i8) = G_TRUNC %2(i32) + %7:_(i8) = G_TRUNC %3(i32) + %8:_(<2 x i8>) = G_BUILD_VECTOR %4(i8), %5(i8) + %9:_(<2 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8) + %10:_(<2 x i8>), %11:_(<2 x i1>) = G_UMULO %8, %9 + %12:_(<2 x i32>) = G_ZEXT %11(<2 x i1>) + %13:_(i8), %14:_(i8) = G_UNMERGE_VALUES %10(<2 x i8>) + %15:_(i1), %16:_(i1) = G_UNMERGE_VALUES %11(<2 x i1>) + %17:_(i16) = G_MERGE_VALUES %13(i8), %14(i8) + %18:_(i32) = G_ANYEXT %17(i16) + %19:_(i32) = G_ANYEXT %15(i1) + %20:_(i32) = G_ANYEXT %16(i1) + $vgpr0 = COPY %18(i32) + $vgpr1 = COPY %19(i32) + $vgpr2 = COPY %20(i32) ... --- @@ -725,95 +718,95 @@ body: | ; GFX8-LABEL: name: test_umulo_v4s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR2]], [[LSHR5]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL]] - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) - ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C3]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C3]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C3]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[AND5]], [[AND6]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[LSHR2]], [[LSHR5]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C3]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND8]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND7]], [[SHL]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[MUL2]], [[C3]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[MUL3]], [[C3]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C2]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](i32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: test_umulo_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR2]], [[LSHR5]] - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL]] - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 - %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) = G_UNMERGE_VALUES %1 - %10:_(<4 x s8>) = G_BUILD_VECTOR %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) - %11:_(<4 x s8>) = G_BUILD_VECTOR %6:_(s8), %7:_(s8), %8:_(s8), %9:_(s8) - %12:_(<4 x s8>), %18:_(<4 x s1>) = G_UMULO %10:_, %11:_ - %13:_(s8), %14:_(s8), %15:_(s8), %16:_(s8) = G_UNMERGE_VALUES %12:_(<4 x s8>) - %19:_(s1), %20:_(s1), %21:_(s1), %22:_(s1) = G_UNMERGE_VALUES %18:_(<4 x s1>) - %17:_(s32) = G_MERGE_VALUES %13, %14, %15, %16 - %23:_(s32) = G_ANYEXT %19 - $vgpr0 = COPY %17 - $vgpr1 = COPY %23 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C1]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C2]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[MUL]](i32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[LSHR]], [[C3]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[LSHR3]], [[C3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C3]] + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[LSHR4]], [[C3]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[AND5]], [[AND6]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[LSHR2]], [[LSHR5]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[MUL1]], [[C3]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND8]], [[C]](i32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND7]], [[SHL]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[MUL2]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND9]], [[C1]](i32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[MUL3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND10]], [[C2]](i32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](i32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %0(i32) + %6:_(i8), %7:_(i8), %8:_(i8), %9:_(i8) = G_UNMERGE_VALUES %1(i32) + %10:_(<4 x i8>) = G_BUILD_VECTOR %2(i8), %3(i8), %4(i8), %5(i8) + %11:_(<4 x i8>) = G_BUILD_VECTOR %6(i8), %7(i8), %8(i8), %9(i8) + %12:_(<4 x i8>), %13:_(<4 x i1>) = G_UMULO %10, %11 + %14:_(i8), %15:_(i8), %16:_(i8), %17:_(i8) = G_UNMERGE_VALUES %12(<4 x i8>) + %18:_(i1), %19:_(i1), %20:_(i1), %21:_(i1) = G_UNMERGE_VALUES %13(<4 x i1>) + %22:_(i32) = G_MERGE_VALUES %14(i8), %15(i8), %16(i8), %17(i8) + %23:_(i32) = G_ANYEXT %18(i1) + $vgpr0 = COPY %22(i32) + $vgpr1 = COPY %23(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index c231aa8334d45..bda591289d07d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -6,14 +6,14 @@ name: test_unmerge_s32_s64 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_s32_s64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - %0:_(s64) = G_CONSTANT i64 0 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64) - $vgpr0 = COPY %1(s32) - $vgpr1 = COPY %2(s32) + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + %0:_(i64) = G_CONSTANT i64 0 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(i64) + $vgpr0 = COPY %1(i32) + $vgpr1 = COPY %2(i32) ... --- @@ -24,14 +24,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s32_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0 - $vgpr0 = COPY %1 - $vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + $vgpr0 = COPY %1(i32) + $vgpr1 = COPY %2(i32) ... --- @@ -40,38 +40,38 @@ body: | bb.0: liveins: ; CHECK-LABEL: name: test_unmerge_s32_v7s64 - ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s64>) = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<7 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - %0:_(<7 x s64>) = IMPLICIT_DEF - %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %0 - $vgpr0 = COPY %1 - $vgpr1 = COPY %2 - $vgpr2 = COPY %3 - $vgpr3 = COPY %4 - $vgpr4 = COPY %5 - $vgpr5 = COPY %6 - $vgpr6 = COPY %7 - $vgpr7 = COPY %8 - $vgpr8 = COPY %9 - $vgpr9 = COPY %10 - $vgpr10 = COPY %11 - $vgpr11 = COPY %12 - $vgpr12 = COPY %13 - $vgpr13 = COPY %14 + ; CHECK: [[DEF:%[0-9]+]]:_(<7 x i64>) = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32), [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<7 x i64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](i32) + %0:_(<7 x i64>) = IMPLICIT_DEF + %1:_(i32), %2:_(i32), %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32), %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32), %11:_(i32), %12:_(i32), %13:_(i32), %14:_(i32) = G_UNMERGE_VALUES %0(<7 x i64>) + $vgpr0 = COPY %1(i32) + $vgpr1 = COPY %2(i32) + $vgpr2 = COPY %3(i32) + $vgpr3 = COPY %4(i32) + $vgpr4 = COPY %5(i32) + $vgpr5 = COPY %6(i32) + $vgpr6 = COPY %7(i32) + $vgpr7 = COPY %8(i32) + $vgpr8 = COPY %9(i32) + $vgpr9 = COPY %10(i32) + $vgpr10 = COPY %11(i32) + $vgpr11 = COPY %12(i32) + $vgpr12 = COPY %13(i32) + $vgpr13 = COPY %14(i32) ... --- @@ -82,18 +82,18 @@ body: | ; CHECK-LABEL: name: test_unmerge_s16_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0 - %3:_(s32) = G_ANYEXT %1 - %4:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 - $vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(i16), %2:_(i16) = G_UNMERGE_VALUES %0(<2 x i16>) + %3:_(i32) = G_ANYEXT %1(i16) + %4:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -101,23 +101,23 @@ name: test_unmerge_s16_v3s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_s16_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32) - %0:_(<3 x s16>) = G_IMPLICIT_DEF - %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0 - %4:_(s32) = G_ANYEXT %1 - %5:_(s32) = G_ANYEXT %2 - %6:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2 = COPY %6 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](i32) + %0:_(<3 x i16>) = G_IMPLICIT_DEF + %1:_(i16), %2:_(i16), %3:_(i16) = G_UNMERGE_VALUES %0(<3 x i16>) + %4:_(i32) = G_ANYEXT %1(i16) + %5:_(i32) = G_ANYEXT %2(i16) + %6:_(i32) = G_ANYEXT %3(i16) + $vgpr0 = COPY %4(i32) + $vgpr1 = COPY %5(i32) + $vgpr2 = COPY %6(i32) ... --- @@ -129,27 +129,27 @@ body: | ; CHECK-LABEL: name: test_unmerge_s16_v4s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0 - %5:_(s32) = G_ANYEXT %1 - %6:_(s32) = G_ANYEXT %2 - %7:_(s32) = G_ANYEXT %3 - %8:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 - $vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](i32) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(i16), %2:_(i16), %3:_(i16), %4:_(i16) = G_UNMERGE_VALUES %0(<4 x i16>) + %5:_(i32) = G_ANYEXT %1(i16) + %6:_(i32) = G_ANYEXT %2(i16) + %7:_(i32) = G_ANYEXT %3(i16) + %8:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) + $vgpr3 = COPY %8(i32) ... --- @@ -160,35 +160,35 @@ body: | ; CHECK-LABEL: name: test_unmerge_s16_v6s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[BITCAST2]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[LSHR2]](s32) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 - %7:_(s32) = G_ANYEXT %1 - %8:_(s32) = G_ANYEXT %2 - %9:_(s32) = G_ANYEXT %3 - %10:_(s32) = G_ANYEXT %4 - %11:_(s32) = G_ANYEXT %5 - %12:_(s32) = G_ANYEXT %6 - $vgpr0 = COPY %7 - $vgpr1 = COPY %8 - $vgpr2 = COPY %9 - $vgpr3 = COPY %10 - $vgpr4 = COPY %11 - $vgpr5 = COPY %12 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[BITCAST2]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[LSHR2]](i32) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i16), %2:_(i16), %3:_(i16), %4:_(i16), %5:_(i16), %6:_(i16) = G_UNMERGE_VALUES %0(<6 x i16>) + %7:_(i32) = G_ANYEXT %1(i16) + %8:_(i32) = G_ANYEXT %2(i16) + %9:_(i32) = G_ANYEXT %3(i16) + %10:_(i32) = G_ANYEXT %4(i16) + %11:_(i32) = G_ANYEXT %5(i16) + %12:_(i32) = G_ANYEXT %6(i16) + $vgpr0 = COPY %7(i32) + $vgpr1 = COPY %8(i32) + $vgpr2 = COPY %9(i32) + $vgpr3 = COPY %10(i32) + $vgpr4 = COPY %11(i32) + $vgpr5 = COPY %12(i32) ... --- @@ -200,18 +200,18 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 - %4:_(s32) = G_ANYEXT %2 - %5:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i8), %3:_(i8) = G_UNMERGE_VALUES %1(i16) + %4:_(i32) = G_ANYEXT %2(i8) + %5:_(i32) = G_ANYEXT %3(i8) + $vgpr0 = COPY %4(i32) + $vgpr1 = COPY %5(i32) ... --- @@ -222,27 +222,27 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8) = G_UNMERGE_VALUES %0 - %5:_(s32) = G_ANYEXT %1 - %6:_(s32) = G_ANYEXT %2 - %7:_(s32) = G_ANYEXT %3 - %8:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 - $vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8), %2:_(i8), %3:_(i8), %4:_(i8) = G_UNMERGE_VALUES %0(i32) + %5:_(i32) = G_ANYEXT %1(i8) + %6:_(i32) = G_ANYEXT %2(i8) + %7:_(i32) = G_ANYEXT %3(i8) + %8:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) + $vgpr3 = COPY %8(i32) ... --- @@ -253,40 +253,40 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_s48 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[LSHR3]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_TRUNC %0 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8) = G_UNMERGE_VALUES %1 - %8:_(s32) = G_ANYEXT %2 - %9:_(s32) = G_ANYEXT %3 - %10:_(s32) = G_ANYEXT %4 - %11:_(s32) = G_ANYEXT %5 - %12:_(s32) = G_ANYEXT %6 - %13:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 - $vgpr1 = COPY %9 - $vgpr2 = COPY %10 - $vgpr3 = COPY %11 - $vgpr4 = COPY %12 - $vgpr5 = COPY %13 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY2]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[C1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[LSHR3]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i48) = G_TRUNC %0(i64) + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8), %6:_(i8), %7:_(i8) = G_UNMERGE_VALUES %1(i48) + %8:_(i32) = G_ANYEXT %2(i8) + %9:_(i32) = G_ANYEXT %3(i8) + %10:_(i32) = G_ANYEXT %4(i8) + %11:_(i32) = G_ANYEXT %5(i8) + %12:_(i32) = G_ANYEXT %6(i8) + %13:_(i32) = G_ANYEXT %7(i8) + $vgpr0 = COPY %8(i32) + $vgpr1 = COPY %9(i32) + $vgpr2 = COPY %10(i32) + $vgpr3 = COPY %11(i32) + $vgpr4 = COPY %12(i32) + $vgpr5 = COPY %13(i32) ... --- @@ -297,22 +297,22 @@ body: | ; CHECK-LABEL: name: test_unmerge_s16_s48 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_TRUNC %0 - %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 - %5:_(s32) = G_ANYEXT %2 - %6:_(s32) = G_ANYEXT %3 - %7:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i48) = G_TRUNC %0(i64) + %2:_(i16), %3:_(i16), %4:_(i16) = G_UNMERGE_VALUES %1(i48) + %5:_(i32) = G_ANYEXT %2(i16) + %6:_(i32) = G_ANYEXT %3(i16) + %7:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) ... --- @@ -323,48 +323,48 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[LSHR5]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %0 - %10:_(s32) = G_ANYEXT %1 - %11:_(s32) = G_ANYEXT %2 - %12:_(s32) = G_ANYEXT %3 - %13:_(s32) = G_ANYEXT %4 - %14:_(s32) = G_ANYEXT %5 - %15:_(s32) = G_ANYEXT %6 - %16:_(s32) = G_ANYEXT %7 - %17:_(s32) = G_ANYEXT %8 - $vgpr0 = COPY %10 - $vgpr1 = COPY %11 - $vgpr2 = COPY %12 - $vgpr3 = COPY %13 - $vgpr4 = COPY %14 - $vgpr5 = COPY %15 - $vgpr6 = COPY %16 - $vgpr7 = COPY %17 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C2]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY3]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[C1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[LSHR5]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i8), %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8), %6:_(i8), %7:_(i8), %8:_(i8) = G_UNMERGE_VALUES %0(i64) + %9:_(i32) = G_ANYEXT %1(i8) + %10:_(i32) = G_ANYEXT %2(i8) + %11:_(i32) = G_ANYEXT %3(i8) + %12:_(i32) = G_ANYEXT %4(i8) + %13:_(i32) = G_ANYEXT %5(i8) + %14:_(i32) = G_ANYEXT %6(i8) + %15:_(i32) = G_ANYEXT %7(i8) + %16:_(i32) = G_ANYEXT %8(i8) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) + $vgpr2 = COPY %11(i32) + $vgpr3 = COPY %12(i32) + $vgpr4 = COPY %13(i32) + $vgpr5 = COPY %14(i32) + $vgpr6 = COPY %15(i32) + $vgpr7 = COPY %16(i32) ... --- @@ -376,47 +376,47 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[LSHR5]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C2]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY3]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[C1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR3]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[LSHR5]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %0 - %10:_(s32) = G_ANYEXT %1 - %11:_(s32) = G_ANYEXT %2 - %12:_(s32) = G_ANYEXT %3 - %13:_(s32) = G_ANYEXT %4 - %14:_(s32) = G_ANYEXT %5 - %15:_(s32) = G_ANYEXT %6 - %16:_(s32) = G_ANYEXT %7 - %17:_(s32) = G_ANYEXT %8 - $vgpr0 = COPY %10 - $vgpr1 = COPY %11 - $vgpr2 = COPY %12 - $vgpr3 = COPY %13 - $vgpr4 = COPY %14 - $vgpr5 = COPY %15 - $vgpr6 = COPY %16 - $vgpr7 = COPY %17 + %1:_(i8), %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8), %6:_(i8), %7:_(i8), %8:_(i8) = G_UNMERGE_VALUES %0(p1) + %9:_(i32) = G_ANYEXT %1(i8) + %10:_(i32) = G_ANYEXT %2(i8) + %11:_(i32) = G_ANYEXT %3(i8) + %12:_(i32) = G_ANYEXT %4(i8) + %13:_(i32) = G_ANYEXT %5(i8) + %14:_(i32) = G_ANYEXT %6(i8) + %15:_(i32) = G_ANYEXT %7(i8) + %16:_(i32) = G_ANYEXT %8(i8) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) + $vgpr2 = COPY %11(i32) + $vgpr3 = COPY %12(i32) + $vgpr4 = COPY %13(i32) + $vgpr5 = COPY %14(i32) + $vgpr6 = COPY %15(i32) + $vgpr7 = COPY %16(i32) ... --- @@ -428,91 +428,91 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C3]](s32) - ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C4]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR3]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR4]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[LSHR5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[LSHR6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[LSHR7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[LSHR8]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[LSHR9]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[LSHR10]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[LSHR11]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[LSHR12]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[LSHR13]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C2]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[AND]], [[COPY1]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C2]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[AND1]], [[COPY2]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[UV]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[AND2]], [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY4]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY5]](i32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[LSHR]], [[COPY6]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[AND3]], [[COPY7]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[AND4]], [[COPY8]](i32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY [[C4]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[AND5]], [[COPY9]](i32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[C3]](i32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(i32) = G_LSHR [[LSHR1]], [[C4]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR3]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR4]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[LSHR5]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[LSHR6]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[LSHR7]](i32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr9 = COPY [[LSHR8]](i32) + ; CHECK-NEXT: $vgpr10 = COPY [[LSHR9]](i32) + ; CHECK-NEXT: $vgpr11 = COPY [[LSHR10]](i32) + ; CHECK-NEXT: $vgpr12 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr13 = COPY [[LSHR11]](i32) + ; CHECK-NEXT: $vgpr14 = COPY [[LSHR12]](i32) + ; CHECK-NEXT: $vgpr15 = COPY [[LSHR13]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s4), %2:_(s4), %3:_(s4), %4:_(s4), %5:_(s4), %6:_(s4), %7:_(s4), %8:_(s4), %9:_(s4), %10:_(s4), %11:_(s4), %12:_(s4), %13:_(s4), %14:_(s4), %15:_(s4), %16:_(s4) = G_UNMERGE_VALUES %0 - %17:_(s32) = G_ANYEXT %1 - %18:_(s32) = G_ANYEXT %2 - %19:_(s32) = G_ANYEXT %3 - %20:_(s32) = G_ANYEXT %4 - %21:_(s32) = G_ANYEXT %5 - %22:_(s32) = G_ANYEXT %6 - %23:_(s32) = G_ANYEXT %7 - %24:_(s32) = G_ANYEXT %8 - %25:_(s32) = G_ANYEXT %9 - %26:_(s32) = G_ANYEXT %10 - %27:_(s32) = G_ANYEXT %11 - %28:_(s32) = G_ANYEXT %12 - %29:_(s32) = G_ANYEXT %13 - %30:_(s32) = G_ANYEXT %14 - %31:_(s32) = G_ANYEXT %15 - %32:_(s32) = G_ANYEXT %16 - $vgpr0 = COPY %17 - $vgpr1 = COPY %18 - $vgpr2 = COPY %19 - $vgpr3 = COPY %20 - $vgpr4 = COPY %21 - $vgpr5 = COPY %22 - $vgpr6 = COPY %23 - $vgpr7 = COPY %24 - $vgpr8 = COPY %25 - $vgpr9 = COPY %26 - $vgpr10 = COPY %27 - $vgpr11 = COPY %28 - $vgpr12 = COPY %29 - $vgpr13 = COPY %30 - $vgpr14 = COPY %31 - $vgpr15 = COPY %32 + %1:_(i4), %2:_(i4), %3:_(i4), %4:_(i4), %5:_(i4), %6:_(i4), %7:_(i4), %8:_(i4), %9:_(i4), %10:_(i4), %11:_(i4), %12:_(i4), %13:_(i4), %14:_(i4), %15:_(i4), %16:_(i4) = G_UNMERGE_VALUES %0(p1) + %17:_(i32) = G_ANYEXT %1(i4) + %18:_(i32) = G_ANYEXT %2(i4) + %19:_(i32) = G_ANYEXT %3(i4) + %20:_(i32) = G_ANYEXT %4(i4) + %21:_(i32) = G_ANYEXT %5(i4) + %22:_(i32) = G_ANYEXT %6(i4) + %23:_(i32) = G_ANYEXT %7(i4) + %24:_(i32) = G_ANYEXT %8(i4) + %25:_(i32) = G_ANYEXT %9(i4) + %26:_(i32) = G_ANYEXT %10(i4) + %27:_(i32) = G_ANYEXT %11(i4) + %28:_(i32) = G_ANYEXT %12(i4) + %29:_(i32) = G_ANYEXT %13(i4) + %30:_(i32) = G_ANYEXT %14(i4) + %31:_(i32) = G_ANYEXT %15(i4) + %32:_(i32) = G_ANYEXT %16(i4) + $vgpr0 = COPY %17(i32) + $vgpr1 = COPY %18(i32) + $vgpr2 = COPY %19(i32) + $vgpr3 = COPY %20(i32) + $vgpr4 = COPY %21(i32) + $vgpr5 = COPY %22(i32) + $vgpr6 = COPY %23(i32) + $vgpr7 = COPY %24(i32) + $vgpr8 = COPY %25(i32) + $vgpr9 = COPY %26(i32) + $vgpr10 = COPY %27(i32) + $vgpr11 = COPY %28(i32) + $vgpr12 = COPY %29(i32) + $vgpr13 = COPY %30(i32) + $vgpr14 = COPY %31(i32) + $vgpr15 = COPY %32(i32) ... --- @@ -524,24 +524,24 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0 - %5:_(s32) = G_ANYEXT %1 - %6:_(s32) = G_ANYEXT %2 - %7:_(s32) = G_ANYEXT %3 - %8:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 - $vgpr3 = COPY %8 + %1:_(i16), %2:_(i16), %3:_(i16), %4:_(i16) = G_UNMERGE_VALUES %0(p1) + %5:_(i32) = G_ANYEXT %1(i16) + %6:_(i32) = G_ANYEXT %2(i16) + %7:_(i32) = G_ANYEXT %3(i16) + %8:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) + $vgpr3 = COPY %8(i32) ... --- @@ -553,13 +553,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0 - $vgpr0 = COPY %1 - $vgpr1 = COPY %2 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(p1) + $vgpr0 = COPY %1(i32) + $vgpr1 = COPY %2(i32) ... --- @@ -570,17 +570,17 @@ body: | ; CHECK-LABEL: name: test_unmerge_s16_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0 - %3:_(s32) = G_ANYEXT %1 - %4:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 - $vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16), %2:_(i16) = G_UNMERGE_VALUES %0(i32) + %3:_(i32) = G_ANYEXT %1(i16) + %4:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -592,17 +592,17 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0 - %3:_(s32) = G_ANYEXT %1 - %4:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 - $vgpr1 = COPY %4 + %1:_(i16), %2:_(i16) = G_UNMERGE_VALUES %0(p3) + %3:_(i32) = G_ANYEXT %1(i16) + %4:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -614,27 +614,27 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[PTRTOINT]], [[C2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8) = G_UNMERGE_VALUES %0 - %5:_(s32) = G_ANYEXT %1 - %6:_(s32) = G_ANYEXT %2 - %7:_(s32) = G_ANYEXT %3 - %8:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 - $vgpr3 = COPY %8 + %1:_(i8), %2:_(i8), %3:_(i8), %4:_(i8) = G_UNMERGE_VALUES %0(p3) + %5:_(i32) = G_ANYEXT %1(i8) + %6:_(i32) = G_ANYEXT %2(i8) + %7:_(i32) = G_ANYEXT %3(i8) + %8:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) + $vgpr3 = COPY %8(i32) ... @@ -646,25 +646,25 @@ body: | ; CHECK-LABEL: name: test_unmerge_s16_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0 - %5:_(s32) = G_ANYEXT %1 - %6:_(s32) = G_ANYEXT %2 - %7:_(s32) = G_ANYEXT %3 - %8:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 - $vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i16), %2:_(i16), %3:_(i16), %4:_(i16) = G_UNMERGE_VALUES %0(i64) + %5:_(i32) = G_ANYEXT %1(i16) + %6:_(i32) = G_ANYEXT %2(i16) + %7:_(i32) = G_ANYEXT %3(i16) + %8:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) + $vgpr3 = COPY %8(i32) ... --- @@ -675,23 +675,23 @@ body: | ; CHECK-LABEL: name: test_unmerge_s1_s3 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s3) = G_TRUNC %0 - %2:_(s1), %3:_(s1), %4:_(s1) = G_UNMERGE_VALUES %1 - %5:_(s32) = G_ANYEXT %2 - %6:_(s32) = G_ANYEXT %3 - %7:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 - $vgpr2 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i3) = G_TRUNC %0(i32) + %2:_(i1), %3:_(i1), %4:_(i1) = G_UNMERGE_VALUES %1(i3) + %5:_(i32) = G_ANYEXT %2(i1) + %6:_(i32) = G_ANYEXT %3(i1) + %7:_(i32) = G_ANYEXT %4(i1) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) + $vgpr2 = COPY %7(i32) ... --- @@ -702,48 +702,48 @@ body: | ; CHECK-LABEL: name: test_unmerge_s1_s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C4]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C6]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[LSHR3]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[LSHR5]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[LSHR6]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s1), %3:_(s1), %4:_(s1), %5:_(s1), %6:_(s1), %7:_(s1), %8:_(s1), %9:_(s1) = G_UNMERGE_VALUES %1 - %10:_(s32) = G_ANYEXT %2 - %11:_(s32) = G_ANYEXT %3 - %12:_(s32) = G_ANYEXT %4 - %13:_(s32) = G_ANYEXT %5 - %14:_(s32) = G_ANYEXT %6 - %15:_(s32) = G_ANYEXT %7 - %16:_(s32) = G_ANYEXT %8 - %17:_(s32) = G_ANYEXT %9 - $vgpr0 = COPY %10 - $vgpr1 = COPY %11 - $vgpr2 = COPY %12 - $vgpr3 = COPY %13 - $vgpr4 = COPY %14 - $vgpr5 = COPY %15 - $vgpr6 = COPY %16 - $vgpr7 = COPY %17 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C3]](i32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C4]](i32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C5]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C6]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](i32) + ; CHECK-NEXT: $vgpr4 = COPY [[LSHR3]](i32) + ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](i32) + ; CHECK-NEXT: $vgpr6 = COPY [[LSHR5]](i32) + ; CHECK-NEXT: $vgpr7 = COPY [[LSHR6]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i1), %3:_(i1), %4:_(i1), %5:_(i1), %6:_(i1), %7:_(i1), %8:_(i1), %9:_(i1) = G_UNMERGE_VALUES %1(i8) + %10:_(i32) = G_ANYEXT %2(i1) + %11:_(i32) = G_ANYEXT %3(i1) + %12:_(i32) = G_ANYEXT %4(i1) + %13:_(i32) = G_ANYEXT %5(i1) + %14:_(i32) = G_ANYEXT %6(i1) + %15:_(i32) = G_ANYEXT %7(i1) + %16:_(i32) = G_ANYEXT %8(i1) + %17:_(i32) = G_ANYEXT %9(i1) + $vgpr0 = COPY %10(i32) + $vgpr1 = COPY %11(i32) + $vgpr2 = COPY %12(i32) + $vgpr3 = COPY %13(i32) + $vgpr4 = COPY %14(i32) + $vgpr5 = COPY %15(i32) + $vgpr6 = COPY %16(i32) + $vgpr7 = COPY %17(i32) ... --- @@ -755,14 +755,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s128_v2s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](s128) - ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](s128) - %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s128), %2:_(s128) = G_UNMERGE_VALUES %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 - $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](<2 x i128>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](i128) + ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](i128) + %0:_(<2 x i128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i128), %2:_(i128) = G_UNMERGE_VALUES %0(<2 x i128>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) + $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(i128) ... --- @@ -774,14 +774,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s128_s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](s128) - ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](s128) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s128), %2:_(s128) = G_UNMERGE_VALUES %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 - $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i128), [[UV1:%[0-9]+]]:_(i128) = G_UNMERGE_VALUES [[COPY]](i256) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](i128) + ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](i128) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i128), %2:_(i128) = G_UNMERGE_VALUES %0(i256) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) + $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2(i128) ... --- @@ -794,14 +794,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s256_s512 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s256), [[UV1:%[0-9]+]]:_(s256) = G_UNMERGE_VALUES [[COPY]](s512) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](s256) - ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](s256) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s256), %2:_(s256) = G_UNMERGE_VALUES %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 - $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i256), [[UV1:%[0-9]+]]:_(i256) = G_UNMERGE_VALUES [[COPY]](i512) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](i256) + ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](i256) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i256), %2:_(i256) = G_UNMERGE_VALUES %0(i512) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) + $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %2(i256) ... --- @@ -814,14 +814,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s256_v2s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s256>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s256), [[UV1:%[0-9]+]]:_(s256) = G_UNMERGE_VALUES [[COPY]](<2 x s256>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](s256) - ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](s256) - %0:_(<2 x s256>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s256), %2:_(s256) = G_UNMERGE_VALUES %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 - $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i256>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i256), [[UV1:%[0-9]+]]:_(i256) = G_UNMERGE_VALUES [[COPY]](<2 x i256>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](i256) + ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](i256) + %0:_(<2 x i256>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i256), %2:_(i256) = G_UNMERGE_VALUES %0(<2 x i256>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) + $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %2(i256) ... --- @@ -834,14 +834,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s512_s1024 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s512), [[UV1:%[0-9]+]]:_(s512) = G_UNMERGE_VALUES [[COPY]](s1024) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](s512) - ; CHECK-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](s512) - %0:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(s512), %2:_(s512) = G_UNMERGE_VALUES %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 - $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i512), [[UV1:%[0-9]+]]:_(i512) = G_UNMERGE_VALUES [[COPY]](i1024) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](i512) + ; CHECK-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](i512) + %0:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(i512), %2:_(i512) = G_UNMERGE_VALUES %0(i1024) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) + $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %2(i512) ... --- @@ -854,14 +854,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s512_v2s512 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s512>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s512), [[UV1:%[0-9]+]]:_(s512) = G_UNMERGE_VALUES [[COPY]](<2 x s512>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](s512) - ; CHECK-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](s512) - %0:_(<2 x s512>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(s512), %2:_(s512) = G_UNMERGE_VALUES %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 - $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i512>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i512), [[UV1:%[0-9]+]]:_(i512) = G_UNMERGE_VALUES [[COPY]](<2 x i512>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](i512) + ; CHECK-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](i512) + %0:_(<2 x i512>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(i512), %2:_(i512) = G_UNMERGE_VALUES %0(<2 x i512>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) + $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %2(i512) ... --- @@ -869,16 +869,16 @@ name: test_unmerge_v2s1 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_v2s1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC1]](s1) - %0:_(<2 x s1>) = G_IMPLICIT_DEF - %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0 - S_NOP 0, implicit %1 - S_NOP 0, implicit %2 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[UV]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i1) = G_TRUNC [[UV1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](i1) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC1]](i1) + %0:_(<2 x i1>) = G_IMPLICIT_DEF + %1:_(i1), %2:_(i1) = G_UNMERGE_VALUES %0(<2 x i1>) + S_NOP 0, implicit %1(i1) + S_NOP 0, implicit %2(i1) ... --- @@ -890,28 +890,28 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_v4s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<4 x s8>) = G_BITCAST %0 - %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 - %6:_(s32) = G_ANYEXT %2 - %7:_(s32) = G_ANYEXT %3 - %8:_(s32) = G_ANYEXT %4 - %9:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 - $vgpr2 = COPY %8 - $vgpr3 = COPY %9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<4 x i8>) = G_BITCAST %0(i32) + %2:_(i8), %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %1(<4 x i8>) + %6:_(i32) = G_ANYEXT %2(i8) + %7:_(i32) = G_ANYEXT %3(i8) + %8:_(i32) = G_ANYEXT %4(i8) + %9:_(i32) = G_ANYEXT %5(i8) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) + $vgpr2 = COPY %8(i32) + $vgpr3 = COPY %9(i32) ... --- @@ -923,24 +923,24 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_v3s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(<3 x s8>) = G_BITCAST %1 - %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %2 - %6:_(s32) = G_ANYEXT %3 - %7:_(s32) = G_ANYEXT %4 - %8:_(s32) = G_ANYEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 - $vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i24) = G_TRUNC %0(i32) + %2:_(<3 x i8>) = G_BITCAST %1(i24) + %3:_(i8), %4:_(i8), %5:_(i8) = G_UNMERGE_VALUES %2(<3 x i8>) + %6:_(i32) = G_ANYEXT %3(i8) + %7:_(i32) = G_ANYEXT %4(i8) + %8:_(i32) = G_ANYEXT %5(i8) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) + $vgpr2 = COPY %8(i32) ... --- @@ -952,19 +952,19 @@ body: | ; CHECK-LABEL: name: test_unmerge_s8_v2s8 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(<2 x s8>) = G_BITCAST %1 - %3:_(s8), %4:_(s8) = G_UNMERGE_VALUES %2 - %5:_(s32) = G_ANYEXT %3 - %6:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 - $vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(<2 x i8>) = G_BITCAST %1(i16) + %3:_(i8), %4:_(i8) = G_UNMERGE_VALUES %2(<2 x i8>) + %5:_(i32) = G_ANYEXT %3(i8) + %6:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %6(i32) ... @@ -977,22 +977,22 @@ body: | ; CHECK-LABEL: name: test_unmerge_v3s32_v12s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY1]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>) - ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>) - ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[UV2]](<3 x s32>) - ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[UV3]](<3 x s32>) - %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - %2:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1 - %3:_(<3 x s32>), %4:_(<3 x s32>), %5:_(<3 x s32>), %6:_(<3 x s32>) = G_UNMERGE_VALUES %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 - $vgpr3_vgpr4_vgpr5 = COPY %4 - $vgpr6_vgpr7_vgpr8 = COPY %5 - $vgpr9_vgpr10_vgpr11 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x i32>), [[UV1:%[0-9]+]]:_(<3 x i32>) = G_UNMERGE_VALUES [[COPY]](<6 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x i32>), [[UV3:%[0-9]+]]:_(<3 x i32>) = G_UNMERGE_VALUES [[COPY1]](<6 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x i32>) + ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x i32>) + ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[UV2]](<3 x i32>) + ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[UV3]](<3 x i32>) + %0:_(<6 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x i32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + %2:_(<12 x i32>) = G_CONCAT_VECTORS %0(<6 x i32>), %1(<6 x i32>) + %3:_(<3 x i32>), %4:_(<3 x i32>), %5:_(<3 x i32>), %6:_(<3 x i32>) = G_UNMERGE_VALUES %2(<12 x i32>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) + $vgpr3_vgpr4_vgpr5 = COPY %4(<3 x i32>) + $vgpr6_vgpr7_vgpr8 = COPY %5(<3 x i32>) + $vgpr9_vgpr10_vgpr11 = COPY %6(<3 x i32>) ... @@ -1005,39 +1005,39 @@ body: | ; CHECK-LABEL: name: test_unmerge_v3s8_v12s8 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR2]](s32), [[UV1]](s32), [[LSHR3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[LSHR5]](s32), [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x s32>) - ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x s32>) - ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<12 x s8>) = G_BITCAST %0 - %2:_(<3 x s8>), %3:_(<3 x s8>), %4:_(<3 x s8>), %5:_(<3 x s8>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s32>) = G_ANYEXT %2 - %7:_(<3 x s32>) = G_ANYEXT %3 - %8:_(<3 x s32>) = G_ANYEXT %4 - %9:_(<3 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 - $vgpr3_vgpr4_vgpr5 = COPY %7 - $vgpr6_vgpr7_vgpr8 = COPY %8 - $vgpr9_vgpr10_vgpr11 = COPY %9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C]](i32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[UV1]], [[C2]](i32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C1]](i32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[LSHR]](i32), [[LSHR1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR2]](i32), [[UV1]](i32), [[LSHR3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR4]](i32), [[LSHR5]](i32), [[UV2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR6]](i32), [[LSHR7]](i32), [[LSHR8]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x i32>) + ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x i32>) + ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<12 x i8>) = G_BITCAST %0(<3 x i32>) + %2:_(<3 x i8>), %3:_(<3 x i8>), %4:_(<3 x i8>), %5:_(<3 x i8>) = G_UNMERGE_VALUES %1(<12 x i8>) + %6:_(<3 x i32>) = G_ANYEXT %2(<3 x i8>) + %7:_(<3 x i32>) = G_ANYEXT %3(<3 x i8>) + %8:_(<3 x i32>) = G_ANYEXT %4(<3 x i8>) + %9:_(<3 x i32>) = G_ANYEXT %5(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %6(<3 x i32>) + $vgpr3_vgpr4_vgpr5 = COPY %7(<3 x i32>) + $vgpr6_vgpr7_vgpr8 = COPY %8(<3 x i32>) + $vgpr9_vgpr10_vgpr11 = COPY %9(<3 x i32>) ... @@ -1050,43 +1050,43 @@ body: | ; CHECK-LABEL: name: test_unmerge_v3s16_v12s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR3]](s32), [[BITCAST5]](s32) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[BITCAST7]](s32), [[LSHR5]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x s32>) - ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x s32>) - ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x s32>) - %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - %1:_(<3 x s16>), %2:_(<3 x s16>), %3:_(<3 x s16>), %4:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %5:_(<3 x s32>) = G_ANYEXT %1 - %6:_(<3 x s32>) = G_ANYEXT %2 - %7:_(<3 x s32>) = G_ANYEXT %3 - %8:_(<3 x s32>) = G_ANYEXT %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 - $vgpr3_vgpr4_vgpr5 = COPY %6 - $vgpr6_vgpr7_vgpr8 = COPY %7 - $vgpr9_vgpr10_vgpr11 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<12 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST]](i32), [[LSHR]](i32), [[BITCAST1]](i32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>), [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>), [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<12 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR1]](i32), [[BITCAST3]](i32), [[LSHR2]](i32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>), [[UV14:%[0-9]+]]:_(<2 x i16>), [[UV15:%[0-9]+]]:_(<2 x i16>), [[UV16:%[0-9]+]]:_(<2 x i16>), [[UV17:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<12 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV15]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV16]](<2 x i16>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[BITCAST4]](i32), [[LSHR3]](i32), [[BITCAST5]](i32) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x i16>), [[UV19:%[0-9]+]]:_(<2 x i16>), [[UV20:%[0-9]+]]:_(<2 x i16>), [[UV21:%[0-9]+]]:_(<2 x i16>), [[UV22:%[0-9]+]]:_(<2 x i16>), [[UV23:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<12 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(i32) = G_BITCAST [[UV22]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST6]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[UV23]](<2 x i16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[LSHR4]](i32), [[BITCAST7]](i32), [[LSHR5]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x i32>) + ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x i32>) + ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x i32>) + %0:_(<12 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x i16>), %2:_(<3 x i16>), %3:_(<3 x i16>), %4:_(<3 x i16>) = G_UNMERGE_VALUES %0(<12 x i16>) + %5:_(<3 x i32>) = G_ANYEXT %1(<3 x i16>) + %6:_(<3 x i32>) = G_ANYEXT %2(<3 x i16>) + %7:_(<3 x i32>) = G_ANYEXT %3(<3 x i16>) + %8:_(<3 x i32>) = G_ANYEXT %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<3 x i32>) + $vgpr3_vgpr4_vgpr5 = COPY %6(<3 x i32>) + $vgpr6_vgpr7_vgpr8 = COPY %7(<3 x i32>) + $vgpr9_vgpr10_vgpr11 = COPY %8(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index 44f44123bb736..69631c0435d0c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -14,110 +14,110 @@ body: | ; GFX6-LABEL: name: test_urem_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[COPY1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX8-LABEL: name: test_urem_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[COPY1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX9-LABEL: name: test_urem_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[COPY1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX10-LABEL: name: test_urem_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UREM %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[COPY1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[COPY1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[COPY1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[COPY1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UREM %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -129,190 +129,190 @@ body: | ; GFX6-LABEL: name: test_urem_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[UV2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[UV2]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[UV3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[UV3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[UV3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: test_urem_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[UV2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[UV2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[UV3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[UV3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[UV3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: test_urem_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[UV2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[UV2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[UV3]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[UV3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[UV3]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX10-LABEL: name: test_urem_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_UREM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV2]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV2]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[UV]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[UV2]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[UV2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[UV2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[UV2]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV3]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[UV3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[UV3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[UV3]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT1]](i32), [[SELECT3]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_UREM %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -324,594 +324,594 @@ body: | ; GFX6-LABEL: name: test_urem_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX8-LABEL: name: test_urem_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX9-LABEL: name: test_urem_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX10-LABEL: name: test_urem_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV18]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV19]], [[USUBO5]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_UREM %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV14]](i32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV18]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV19]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_UREM %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -923,1142 +923,1142 @@ body: | ; GFX6-LABEL: name: test_urem_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) - ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) - ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] - ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] - ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] - ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] - ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] - ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] - ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] - ; GFX6-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] - ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] - ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] - ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX6-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] - ; GFX6-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] - ; GFX6-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) - ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV11]], [[ADD17]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV17]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV16]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV17]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV17]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV16]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV17]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV18]](i32) + ; GFX6-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV19]](i32) + ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX6-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX6-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX6-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV20]], [[UV22]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] + ; GFX6-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UADDO40]], [[ADD20]] + ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[UADDO44]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[UADDE4]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[USUBO8]], [[UADDO44]] + ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UADDO44]], [[ADD25]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[MUL24]] + ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD25]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[ADD25]] + ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD26]] + ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD25]] + ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(i32) = G_MUL [[UV27]], [[UADDO56]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(i32) = G_MUL [[UV26]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(i32) = G_UMULH [[UV26]], [[UADDO56]] + ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(i32) = G_MUL [[UV27]], [[UADDE6]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(i32) = G_UMULH [[UV27]], [[UADDO56]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(i32) = G_UMULH [[UV26]], [[UADDE6]] + ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD30]] + ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(i32) = G_UMULH [[UV27]], [[UADDE6]] + ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(i32) = G_MUL [[UV28]], [[UADDO66]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(i32) = G_MUL [[UV29]], [[UADDO66]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(i32) = G_MUL [[UV28]], [[ADD33]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(i32) = G_UMULH [[UV28]], [[UADDO66]] + ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV24]], [[MUL33]] + ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV25]], [[ADD35]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO10]](i32), [[USUBE14]](i32) + ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV31]] + ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX6-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV30]] + ; GFX6-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX6-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV31]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV30]] + ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX6-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE18]](i32) + ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE18]](i32), [[UV31]] + ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV30]] + ; GFX6-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE18]](i32), [[UV31]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX6-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV30]] + ; GFX6-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX6-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE22]](i32) + ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: test_urem_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV28]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV29]], [[USUBO5]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV30]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV31]](s32) - ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV37]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] - ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] - ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[UV52]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV55]] - ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] - ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV55]] - ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] - ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] - ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV55]] - ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] - ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV55]] - ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] - ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV55]], [[USUBO13]] - ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) - ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV25]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV23]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[UV26]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV29]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV28]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV29]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV29]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV28]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV29]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV28]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV29]], [[USUBO5]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV30]](i32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV31]](i32) + ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX8-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV32]], [[UV34]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI2]], [[C4]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV37]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV36]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV38]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV36]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UMULH12]] + ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV38]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV36]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV38]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH14]] + ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UADDO40]], [[ADD12]] + ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV38]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDO44]], [[C4]] + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV41]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDE4]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](i32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV40]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO44]], [[UV42]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[UV40]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV42]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV40]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[UV42]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD16]] + ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV42]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV47]], [[UADDO56]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV46]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV46]], [[UADDO56]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH20]] + ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV47]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV47]], [[UADDO56]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV46]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH22]] + ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD20]] + ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV47]], [[UADDE6]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV48]](i32), [[UADDO66]], [[C4]] + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV51]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV48]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV49]](i32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV44]], [[UV50]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV45]], [[UV52]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO10]](i32), [[USUBE14]](i32) + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV55]] + ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV54]] + ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV55]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE18]](i32) + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE18]](i32), [[UV55]] + ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV54]] + ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE18]](i32), [[UV55]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[UV55]], [[USUBO13]] + ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE22]](i32) + ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: test_urem_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV28]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV29]], [[USUBO5]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV30]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV31]](s32) - ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV37]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] - ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] - ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] - ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[UV52]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV55]] - ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] - ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV55]] - ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] - ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] - ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV55]] - ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] - ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV55]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] - ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV55]], [[USUBO13]] - ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) - ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV15]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV25]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV22]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV23]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV18]], [[UV24]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV19]], [[UV26]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV29]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV28]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV29]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV29]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV28]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV29]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV28]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV29]], [[USUBO5]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV30]](i32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV31]](i32) + ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX9-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX9-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV32]], [[UV34]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI2]], [[C4]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](i64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(i64) = G_ANYEXT [[UV37]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](i32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](i64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV36]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[UV38]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV36]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UMULH12]] + ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV38]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV36]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV38]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH14]] + ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UADDO40]], [[ADD12]] + ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV38]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD14]] + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDO44]], [[C4]] + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](i64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(i64) = G_ANYEXT [[UV41]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDE4]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](i32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](i64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV40]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UADDO44]], [[UV42]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[UV40]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV42]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV40]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[UV42]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD16]] + ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV42]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD18]] + ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(i32), [[UV45:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(i32), [[UV47:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV47]], [[UADDO56]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV46]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV46]], [[UADDO56]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH20]] + ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV47]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV47]], [[UADDO56]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV46]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH22]] + ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD20]] + ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV47]], [[UADDE6]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(i32), [[UV49:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV48]](i32), [[UADDO66]], [[C4]] + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(i32), [[UV51:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](i64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(i64) = G_ANYEXT [[UV51]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV48]](i32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV49]](i32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(i32), [[UV53:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](i64) + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV44]], [[UV50]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV45]], [[UV52]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO10]](i32), [[USUBE14]](i32) + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(i32), [[UV55:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV55]] + ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV54]] + ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV55]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE18]](i32) + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE18]](i32), [[UV55]] + ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV54]] + ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE18]](i32), [[UV55]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[UV55]], [[USUBO13]] + ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE22]](i32) + ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX10-LABEL: name: test_urem_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV22]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV23]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV22]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV23]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV22]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV23]], [[USUBO5]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV24]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV25]](s32) - ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] - ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) - ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] - ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] - ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] - ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] - ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) - ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV30]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV30]] - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV30]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV32]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV32]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV32]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO56]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO56]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE6]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDO66]], [[C4]] - ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDO66]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV40]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD35]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV43]] - ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV42]] - ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV43]] - ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV42]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV43]], [[USUBO11]] - ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV43]] - ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV42]] - ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV43]] - ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV42]] - ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV43]], [[USUBO13]] - ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) - ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] - ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] - ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_UREM %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV4]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV5]](i32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[UV8]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV11]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV19]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV23]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV22]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV23]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV22]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV23]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV23]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV22]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV23]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV22]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV23]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(f32) = G_UITOFP [[UV24]](i32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(f32) = G_UITOFP [[UV25]](i32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(f32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD2]](f32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(f32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(f32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD3]](f32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC1]](f32) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(i32), [[UV27:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(i32), [[UV29:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(i32), [[USUBO9:%[0-9]+]]:_(i1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(i32), [[USUBE13:%[0-9]+]]:_(i1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[FPTOUI2]], [[C4]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(i32), [[UV31:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(i32) = G_ADD [[UV31]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(i32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[UV30]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[UV30]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(i32), [[UADDO35:%[0-9]+]]:_(i1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO35]](i1) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(i32), [[UADDO37:%[0-9]+]]:_(i1) = G_UADDO [[UADDO34]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO37]](i1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(i32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[UV30]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(i32), [[UADDO39:%[0-9]+]]:_(i1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO39]](i1) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(i32), [[UADDO41:%[0-9]+]]:_(i1) = G_UADDO [[UADDO38]], [[UMULH14]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO41]](i1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(i32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(i32), [[UADDO43:%[0-9]+]]:_(i1) = G_UADDO [[UADDO40]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO43]](i1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(i32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(i32) = G_ADD [[UMULH15]], [[ADD22]] + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(i32), [[UADDO45:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(i32), [[UADDE5:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](i32), [[UADDO44]], [[C4]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(i32), [[UV33:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](i64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(i32) = G_MUL [[USUBO8]], [[UADDE4]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(i32) = G_ADD [[UV33]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(i32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(i32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[UV32]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(i32) = G_MUL [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[UV32]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(i32), [[UADDO47:%[0-9]+]]:_(i1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO47]](i1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(i32), [[UADDO49:%[0-9]+]]:_(i1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO49]](i1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(i32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(i32) = G_MUL [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[UV32]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(i32) = G_UMULH [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(i32), [[UADDO51:%[0-9]+]]:_(i1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO51]](i1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(i32), [[UADDO53:%[0-9]+]]:_(i1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO53]](i1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(i32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(i32), [[UADDO55:%[0-9]+]]:_(i1) = G_UADDO [[UADDO52]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO55]](i1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(i32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(i32) = G_UMULH [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(i32) = G_ADD [[UMULH19]], [[ADD28]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(i32), [[UADDO57:%[0-9]+]]:_(i1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(i32), [[UADDE7:%[0-9]+]]:_(i1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(i32), [[UV35:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(i32), [[UV37:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(i32) = G_MUL [[UV37]], [[UADDO56]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(i32) = G_MUL [[UV36]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(i32) = G_UMULH [[UV36]], [[UADDO56]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(i32), [[UADDO59:%[0-9]+]]:_(i1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO59]](i1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(i32), [[UADDO61:%[0-9]+]]:_(i1) = G_UADDO [[UADDO58]], [[UMULH20]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO61]](i1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(i32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(i32) = G_MUL [[UV37]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(i32) = G_UMULH [[UV37]], [[UADDO56]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(i32) = G_UMULH [[UV36]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(i32), [[UADDO63:%[0-9]+]]:_(i1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO63]](i1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(i32), [[UADDO65:%[0-9]+]]:_(i1) = G_UADDO [[UADDO62]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO65]](i1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(i32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(i32), [[UADDO67:%[0-9]+]]:_(i1) = G_UADDO [[UADDO64]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO67]](i1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(i32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(i32) = G_UMULH [[UV37]], [[UADDE6]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(i32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(i32), [[UV39:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV38]](i32), [[UADDO66]], [[C4]] + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(i32), [[UV41:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(i32) = G_MUL [[UV38]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(i32) = G_ADD [[UV41]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(i32) = G_MUL [[UV39]], [[UADDO66]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(i32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(i32), [[USUBO11:%[0-9]+]]:_(i1) = G_USUBO [[UV34]], [[UV40]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(i32), [[USUBE15:%[0-9]+]]:_(i1) = G_USUBE [[UV35]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV35]], [[ADD35]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO10]](i32), [[USUBE14]](i32) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(i32), [[UV43:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE14]](i32), [[UV43]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(i32) = G_SEXT [[ICMP8]](i1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO10]](i32), [[UV42]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(i32) = G_SEXT [[ICMP9]](i1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE14]](i32), [[UV43]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(i32) = G_SELECT [[ICMP10]](i1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(i32), [[USUBO13:%[0-9]+]]:_(i1) = G_USUBO [[USUBO10]], [[UV42]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(i32), [[USUBE17:%[0-9]+]]:_(i1) = G_USUBE [[SUB1]], [[UV43]], [[USUBO11]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(i32), [[USUBE19:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO12]](i32), [[USUBE18]](i32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE18]](i32), [[UV43]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(i32) = G_SEXT [[ICMP11]](i1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO12]](i32), [[UV42]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(i32) = G_SEXT [[ICMP12]](i1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE18]](i32), [[UV43]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(i32) = G_SELECT [[ICMP13]](i1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(i32), [[USUBO15:%[0-9]+]]:_(i1) = G_USUBO [[USUBO12]], [[UV42]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(i32), [[USUBE21:%[0-9]+]]:_(i1) = G_USUBE [[USUBE16]], [[UV43]], [[USUBO13]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(i32), [[USUBE23:%[0-9]+]]:_(i1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO14]](i32), [[USUBE22]](i32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT5]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(i64) = G_SELECT [[ICMP14]](i1), [[MV5]], [[MV4]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT4]](i32), [[C5]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(i64) = G_SELECT [[ICMP15]](i1), [[SELECT6]], [[MV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT3]](i64), [[SELECT7]](i64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_UREM %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -2070,129 +2070,129 @@ body: | ; GFX6-LABEL: name: test_urem_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] - ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C]] + ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX8-LABEL: name: test_urem_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX9-LABEL: name: test_urem_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](i32) ; ; GFX10-LABEL: name: test_urem_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] - ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UREM %2, %3 - %5:_(s32) = G_ZEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UREM %2, %3 + %5:_(i32) = G_ZEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -2204,226 +2204,226 @@ body: | ; GFX6-LABEL: name: test_urem_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[LSHR1]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[LSHR1]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[LSHR1]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[LSHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT3]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: test_urem_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[LSHR1]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[LSHR1]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[LSHR1]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[LSHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SELECT1]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SELECT3]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND3]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: test_urem_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT1]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[LSHR1]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[LSHR1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[LSHR1]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[LSHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT3]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) ; ; GFX10-LABEL: name: test_urem_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UREM %0, %1 - $vgpr0 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT1]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[LSHR1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](f32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL1]](f32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(i32) = G_SUB [[C3]], [[LSHR1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB5]](i32), [[LSHR1]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(i32) = G_SUB [[SUB5]], [[LSHR1]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT2]](i32), [[LSHR1]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(i32) = G_SUB [[SELECT2]], [[LSHR1]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[SUB7]], [[SELECT2]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SELECT3]](i32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UREM %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -2435,125 +2435,125 @@ body: | ; GFX6-LABEL: name: test_urem_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX8-LABEL: name: test_urem_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX9-LABEL: name: test_urem_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX10-LABEL: name: test_urem_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_UREM %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_UREM %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -2565,125 +2565,125 @@ body: | ; GFX6-LABEL: name: test_urem_s17 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX8-LABEL: name: test_urem_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX9-LABEL: name: test_urem_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](i32) ; ; GFX10-LABEL: name: test_urem_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s17) = G_TRUNC %0 - %3:_(s17) = G_TRUNC %1 - %4:_(s17) = G_UREM %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 131071 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[AND1]](i32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](f32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FMUL]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SUB1]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[SELECT]](i32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i17) = G_TRUNC %0(i32) + %3:_(i17) = G_TRUNC %1(i32) + %4:_(i17) = G_UREM %2, %3 + %5:_(i32) = G_ANYEXT %4(i17) + $vgpr0 = COPY %5(i32) ... --- @@ -2695,607 +2695,607 @@ body: | ; GFX6-LABEL: name: test_urem_s33 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] - ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] - ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(i32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(i32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(i32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(i32) = G_MUL [[UV10]], [[ADD15]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(i32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX6-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV12]] + ; GFX6-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV13]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX6-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX8-LABEL: name: test_urem_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX9-LABEL: name: test_urem_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] - ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] - ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[UV7]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](i64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[UV11]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](i32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](i64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD8]] + ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](i64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(i64) = G_ANYEXT [[UV21]](i32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV18]](i32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV19]](i32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(i32), [[UV23:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV15]], [[UV22]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(i32), [[UV25:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV24]] + ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV25]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) ; ; GFX10-LABEL: name: test_urem_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] - ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] - ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) - ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] - ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] - ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] - ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] - ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV18]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV19]], [[USUBO5]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) - ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] - ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] - ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s33) = G_TRUNC %0 - %3:_(s33) = G_TRUNC %1 - %4:_(s33) = G_UREM %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 8589934591 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(f32) = G_UITOFP [[UV]](i32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(f32) = G_UITOFP [[UV1]](i32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(f32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(f32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(f32) = G_AMDGPU_RCP_IFLAG [[FADD]](f32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(f32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(f32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(f32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(f32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(f32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(f32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(f32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(i32) = G_FPTOUI [[FADD1]](f32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(i32) = G_FPTOUI [[INTRINSIC_TRUNC]](f32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C5]](i64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV7]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(i32), [[UADDO1:%[0-9]+]]:_(i1) = G_UADDO [[MUL2]], [[MUL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO1]](i1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(i32), [[UADDO3:%[0-9]+]]:_(i1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO3]](i1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(i32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(i32), [[UADDO5:%[0-9]+]]:_(i1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO5]](i1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(i32), [[UADDO7:%[0-9]+]]:_(i1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO7]](i1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(i32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(i32), [[UADDO9:%[0-9]+]]:_(i1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO9]](i1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(i32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(i32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(i32) = G_ADD [[UMULH3]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(i32), [[UADDO11:%[0-9]+]]:_(i1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(i32), [[UADDE1:%[0-9]+]]:_(i1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](i32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](i64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(i32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(i32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(i32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(i32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(i32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(i32), [[UADDO13:%[0-9]+]]:_(i1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO13]](i1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(i32), [[UADDO15:%[0-9]+]]:_(i1) = G_UADDO [[UADDO12]], [[UMULH4]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO15]](i1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(i32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(i32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(i32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(i32), [[UADDO17:%[0-9]+]]:_(i1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO17]](i1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(i32), [[UADDO19:%[0-9]+]]:_(i1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO19]](i1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(i32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(i32), [[UADDO21:%[0-9]+]]:_(i1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO21]](i1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(i32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(i32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(i32) = G_ADD [[UMULH7]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(i32), [[UADDO23:%[0-9]+]]:_(i1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(i32), [[UADDE3:%[0-9]+]]:_(i1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(i32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(i32), [[UADDO25:%[0-9]+]]:_(i1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO25]](i1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(i32), [[UADDO27:%[0-9]+]]:_(i1) = G_UADDO [[UADDO24]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO27]](i1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(i32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(i32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(i32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(i32), [[UADDO29:%[0-9]+]]:_(i1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO29]](i1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(i32), [[UADDO31:%[0-9]+]]:_(i1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO31]](i1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(i32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(i32), [[UADDO33:%[0-9]+]]:_(i1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(i32) = G_ZEXT [[UADDO33]](i1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(i32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(i32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(i32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV14]](i32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](i64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(i32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(i32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(i32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(i32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND1]](i64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(i32) = G_SEXT [[ICMP]](i1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO2]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(i32) = G_SEXT [[ICMP1]](i1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE2]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(i32), [[USUBO5:%[0-9]+]]:_(i1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(i32), [[USUBE5:%[0-9]+]]:_(i1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(i32), [[USUBE7:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO4]](i32), [[USUBE6]](i32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(i32) = G_SEXT [[ICMP3]](i1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(uge), [[USUBO4]](i32), [[UV18]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(i32) = G_SEXT [[ICMP4]](i1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[USUBE6]](i32), [[UV19]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP5]](i1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(i32), [[USUBO7:%[0-9]+]]:_(i1) = G_USUBO [[USUBO4]], [[UV18]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(i32), [[USUBE9:%[0-9]+]]:_(i1) = G_USUBE [[USUBE4]], [[UV19]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(i32), [[USUBE11:%[0-9]+]]:_(i1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO6]](i32), [[USUBE10]](i32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT1]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(i64) = G_SELECT [[ICMP6]](i1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SELECT]](i32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(i64) = G_SELECT [[ICMP7]](i1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i33) = G_TRUNC %0(i64) + %3:_(i33) = G_TRUNC %1(i64) + %4:_(i33) = G_UREM %2, %3 + %5:_(i64) = G_ANYEXT %4(i33) + $vgpr0_vgpr1 = COPY %5(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index 68564e1a9821d..88105a0745b75 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -14,66 +14,66 @@ body: | ; GFX6-LABEL: name: ushlsat_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C1]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](i32) ; ; GFX8-LABEL: name: ushlsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[AND]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[SELECT]], [[C1]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ushlsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_USHLSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[AND]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[SELECT]], [[C1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_USHLSAT %2, %3(i7) + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -85,66 +85,66 @@ body: | ; GFX6-LABEL: name: ushlsat_s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C1]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](i32) ; ; GFX8-LABEL: name: ushlsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[AND]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[SELECT]], [[C1]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ushlsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_USHLSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[AND]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i16) = G_LSHR [[SELECT]], [[C1]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_USHLSAT %2, %3(i8) + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -156,113 +156,113 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[COPY2]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C3]], [[SHL1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C1]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[LSHR1]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[AND1]](i32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[SHL3]], [[AND1]](i32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C3]], [[SHL3]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C1]](i32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[COPY2]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: ushlsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR5]], [[C2]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR3]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[AND]](i16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[LSHR2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C3]], [[SHL1]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[SELECT]], [[C2]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C2]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[SHL2]], [[AND1]](i16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[AND1]](i16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C3]], [[SHL3]] + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[SELECT1]], [[C2]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[LSHR5]], [[C2]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR3]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ushlsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR5]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR3]], [[SHL4]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s8>) = G_BITCAST %2 - %5:_(<2 x s8>) = G_BITCAST %3 - %6:_(<2 x s8>) = G_USHLSAT %4, %5 - %7:_(s16) = G_BITCAST %6 - %8:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C2]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[SHL]], [[AND]](i16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[AND]](i16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i16), [[LSHR2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C3]], [[SHL1]] + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[SELECT]], [[C2]](i16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC3]], [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C2]](i16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[SHL2]], [[AND1]](i16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[AND1]](i16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C3]], [[SHL3]] + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[SELECT1]], [[C2]](i16) + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[LSHR5]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR3]], [[SHL4]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i8>) = G_BITCAST %2(i16) + %5:_(<2 x i8>) = G_BITCAST %3(i16) + %6:_(<2 x i8>) = G_USHLSAT %4, %5(<2 x i8>) + %7:_(i16) = G_BITCAST %6(<2 x i8>) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -274,56 +274,56 @@ body: | ; GFX6-LABEL: name: ushlsat_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C1]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](i32) ; ; GFX8-LABEL: name: ushlsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC1]](i16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: ushlsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_USHLSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[SELECT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_USHLSAT %2, %3(i16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -335,92 +335,92 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR1]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[LSHR1]](s32) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL4]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[LSHR1]](i32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[SHL3]], [[LSHR1]](i32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: ushlsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC2]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[LSHR3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: ushlsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT]](s16), [[SELECT1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_USHLSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC2]](i16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC2]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[LSHR3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT]](i16), [[SELECT1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_USHLSAT %0, %1(<2 x i16>) + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -432,159 +432,159 @@ body: | ; GFX6-LABEL: name: ushlsat_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[LSHR1]](s32) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[LSHR1]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL1]] - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND]](s32) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL3]] - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[LSHR2]](s32) - ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[LSHR2]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL5]] - ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR8]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[LSHR1]](i32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[LSHR1]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[SHL3]], [[AND]](i32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[LSHR5]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL3]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[SHL4]], [[LSHR2]](i32) + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[SHL5]], [[LSHR2]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL4]](i32), [[LSHR7]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C1]], [[SHL5]] + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[SELECT2]], [[C]](i32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR6]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR4]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR8]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR9]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[LSHR5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[SHL2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT2]](i16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR6]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT]](s16), [[SELECT1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT2]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_USHLSAT %1, %2 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC3]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[LSHR5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[SHL2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR6]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT]](i16), [[SELECT1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT2]](i16), [[TRUNC6]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC7]](i16), [[TRUNC8]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>), [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_USHLSAT %1, %2(<3 x i16>) + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -596,159 +596,159 @@ body: | ; GFX6-LABEL: name: ushlsat_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR2]](s32) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[LSHR2]](s32) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND1]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] - ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[LSHR3]](s32) - ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[LSHR3]](s32) - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] - ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] - ; GFX6-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR11]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL9]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[SHL]], [[AND]](i32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[AND]](i32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL]](i32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[SELECT]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[SHL2]], [[LSHR2]](i32) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[SHL3]], [[LSHR2]](i32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL2]](i32), [[LSHR6]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[SELECT1]], [[C]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[SHL4]], [[AND1]](i32) + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(i32) = G_LSHR [[SHL5]], [[AND1]](i32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL4]](i32), [[LSHR8]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(i32) = G_SELECT [[ICMP2]](i1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(i32) = G_LSHR [[SELECT2]], [[C]](i32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[SHL6]], [[LSHR3]](i32) + ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(i32) = G_LSHR [[SHL7]], [[LSHR3]](i32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SHL6]](i32), [[LSHR10]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(i32) = G_SELECT [[ICMP3]](i1), [[C2]], [[SHL7]] + ; GFX6-NEXT: [[LSHR11:%[0-9]+]]:_(i32) = G_LSHR [[SELECT3]], [[C]](i32) + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LSHR7]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR5]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[LSHR11]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR9]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: ushlsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] - ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] - ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[LSHR5]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC6]](i16) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[TRUNC6]](i16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[LSHR6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[SHL2]] + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[TRUNC7]](i16) + ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[TRUNC7]](i16) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC3]](i16), [[LSHR7]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[C1]], [[SHL3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT1]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[SELECT3]](i16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: ushlsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] - ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT]](s16), [[SELECT1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT2]](s16), [[SELECT3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_USHLSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i16) = G_LSHR [[SHL]], [[TRUNC4]](i16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC]](i16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i16) = G_SELECT [[ICMP]](i1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i16) = G_LSHR [[SHL1]], [[TRUNC5]](i16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC1]](i16), [[LSHR5]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i16) = G_SELECT [[ICMP1]](i1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC6]](i16) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(i16) = G_LSHR [[SHL2]], [[TRUNC6]](i16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC2]](i16), [[LSHR6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(i16) = G_SELECT [[ICMP2]](i1), [[C1]], [[SHL2]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[TRUNC7]](i16) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(i16) = G_LSHR [[SHL3]], [[TRUNC7]](i16) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[TRUNC3]](i16), [[LSHR7]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(i16) = G_SELECT [[ICMP3]](i1), [[C1]], [[SHL3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT]](i16), [[SELECT1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SELECT2]](i16), [[SELECT3]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x i16>), [[BUILD_VECTOR1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_USHLSAT %0, %1(<4 x i16>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -760,42 +760,42 @@ body: | ; GFX6-LABEL: name: ushlsat_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL]], [[COPY1]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](i32) ; ; GFX8-LABEL: name: ushlsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL]], [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](i32) ; ; GFX9-LABEL: name: ushlsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_USHLSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL]], [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_USHLSAT %0, %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -807,63 +807,63 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL]], [[UV2]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[UV3]](i32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[LSHR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: ushlsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL]], [[UV2]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[UV3]](i32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[LSHR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: ushlsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_USHLSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[UV]], [[UV2]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SHL]], [[UV2]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[UV1]], [[UV3]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[SHL1]], [[UV3]](i32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[LSHR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i32) = G_SELECT [[ICMP1]](i1), [[C]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_USHLSAT %0, %1(<2 x i32>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -875,45 +875,45 @@ body: | ; GFX6-LABEL: name: ushlsat_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[SHL]], [[TRUNC]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX8-LABEL: name: ushlsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[SHL]], [[TRUNC]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: ushlsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_USHLSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[COPY]], [[TRUNC]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[SHL]], [[TRUNC]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_USHLSAT %0, %1(i64) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -925,67 +925,67 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[TRUNC]](i32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[SHL]], [[TRUNC]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[TRUNC1]](i32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[SHL1]], [[TRUNC1]](i32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[LSHR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[C]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: ushlsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[TRUNC]](i32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[SHL]], [[TRUNC]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[TRUNC1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[SHL1]], [[TRUNC1]](i32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[LSHR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[C]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: ushlsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_USHLSAT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[UV2]](i64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[UV]], [[TRUNC]](i32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[SHL]], [[TRUNC]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i64), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[ICMP]](i1), [[C]], [[SHL]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[UV3]](i64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i64) = G_SHL [[UV1]], [[TRUNC1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i64) = G_LSHR [[SHL1]], [[TRUNC1]](i32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i64), [[LSHR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[ICMP1]](i1), [[C]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_USHLSAT %0, %1(<2 x i64>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir index e630eaefced33..9b5c9b1981ecf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir @@ -10,24 +10,24 @@ body: | ; CHECK-LABEL: name: test_usube_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBE1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_USUBE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBE1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_USUBE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0 = COPY %5(i32) + $vgpr1 = COPY %7(i32) ... --- @@ -39,36 +39,36 @@ body: | ; CHECK-LABEL: name: test_usube_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV4]], [[ICMP]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[ICMP1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBE]](s32), [[USUBE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %3 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %2, %4 - %6:_(<2 x s32>), %7:_(<2 x s1>) = G_USUBE %0, %1, %5 - %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %6 - $vgpr2_vgpr3 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV]](i32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[UV1]](i32), [[C]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV2]], [[UV4]], [[ICMP]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV3]], [[UV5]], [[ICMP1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[USUBE]](i32), [[USUBE2]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBE1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBE3]](i1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(<2 x i32>) = G_BUILD_VECTOR %3(i32), %3(i32) + %5:_(<2 x i1>) = G_ICMP intpred(eq), %2(<2 x i32>), %4 + %6:_(<2 x i32>), %7:_(<2 x i1>) = G_USUBE %0, %1, %5 + %8:_(<2 x i32>) = G_ZEXT %7(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) + $vgpr2_vgpr3 = COPY %8(<2 x i32>) ... --- @@ -80,31 +80,31 @@ body: | ; CHECK-LABEL: name: test_usube_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %13, [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[AND]], [[COPY2]], [[ICMP]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[USUBE]](s32), [[AND1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s16) = G_TRUNC %0 - %6:_(s16) = G_TRUNC %1 - %7:_(s16), %8:_(s1) = G_USUBE %6, %7, %4 - %9:_(s32) = G_ANYEXT %7 - %10:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND %13, [[C1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND1]](i32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[AND]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[USUBE]](i32), [[AND1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i16) = G_TRUNC %0(i32) + %6:_(i16) = G_TRUNC %1(i32) + %7:_(i16), %8:_(i1) = G_USUBE %6, %7, %4 + %9:_(i32) = G_ANYEXT %7(i16) + %10:_(i32) = G_ZEXT %8(i1) + $vgpr0 = COPY %9(i32) + $vgpr1 = COPY %10(i32) ... --- @@ -116,26 +116,26 @@ body: | ; CHECK-LABEL: name: test_usube_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV]], [[UV2]], [[ICMP]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBE1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE]](s32), [[USUBE2]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBE3]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s64), %6:_(s1) = G_USUBE %0, %1, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0_vgpr1 = COPY %5 - $vgpr2 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV]], [[UV2]], [[ICMP]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBE1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBE]](i32), [[USUBE2]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBE3]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i32) = COPY $vgpr4 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i64), %6:_(i1) = G_USUBE %0, %1, %4 + %7:_(i32) = G_ZEXT %6(i1) + $vgpr0_vgpr1 = COPY %5(i64) + $vgpr2 = COPY %7(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir index 6aff25387fc3e..2b7bc0752ce64 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -10,18 +10,18 @@ body: | ; CHECK-LABEL: name: test_usubo_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[USUBO]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_USUBO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBO1]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[USUBO]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_USUBO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0 = COPY %2(i32) + $vgpr1 = COPY %4(i32) ... --- @@ -33,28 +33,28 @@ body: | ; CHECK-LABEL: name: test_usubo_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB]](i32), [[AND2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7), %5:_(i1) = G_USUBO %2, %3 + %6:_(i32) = G_ZEXT %4(i7) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) - %4:_(s7), %5:_(s1) = G_USUBO %2, %3 - %6:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 ... --- @@ -66,28 +66,28 @@ body: | ; CHECK-LABEL: name: test_usubo_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB]](i32), [[AND2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16), %5:_(i1) = G_USUBO %2, %3 + %6:_(i32) = G_ZEXT %4(i16) + %7:_(i32) = G_ZEXT %5(i1) + $vgpr0 = COPY %6(i32) + $vgpr1 = COPY %7(i32) - %4:_(s16), %5:_(s1) = G_USUBO %2, %3 - %6:_(s32) = G_ZEXT %4 - %7:_(s32) = G_ZEXT %5 - $vgpr0 = COPY %6 - $vgpr1 = COPY %7 ... --- @@ -99,22 +99,22 @@ body: | ; CHECK-LABEL: name: test_usubo_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBE1]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64), %3:_(s1) = G_USUBO %0, %1 - %4:_(s32) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBE1]](i1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64), %3:_(i1) = G_USUBO %0, %1 + %4:_(i32) = G_ZEXT %3(i1) + $vgpr0_vgpr1 = COPY %2(i64) + $vgpr2 = COPY %4(i32) ... --- @@ -126,41 +126,41 @@ body: | ; CHECK-LABEL: name: test_usubo_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>), %3:_(<2 x s1>) = G_USUBO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0 = COPY %2 - $vgpr1_vgpr2 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB]](i32), [[AND2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB1]](i32), [[AND3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AND3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY3]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND4]](i32), [[AND5]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>), %3:_(<2 x i1>) = G_USUBO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0 = COPY %2(<2 x i16>) + $vgpr1_vgpr2 = COPY %4(<2 x i32>) ... --- @@ -171,71 +171,71 @@ body: | ; CHECK-LABEL: name: test_usubo_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND4]], [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND6]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>), %7:_(<3 x s1>) = G_USUBO %2, %4 - %8:_(<3 x s16>) = G_IMPLICIT_DEF - %9:_(<6 x s16>) = G_CONCAT_VECTORS %6, %8 - %10:_(<3 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1_vgpr2 = COPY %9 - $vgpr0_vgpr1_vgpr2 = COPY %10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB]](i32), [[AND2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB1]](i32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB2]](i32), [[AND6]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AND3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY3]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[AND6]](i32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND7]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND8]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>), [[BITCAST8]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND9]](i32), [[AND10]](i32), [[AND11]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>), %7:_(<3 x i1>) = G_USUBO %2, %4 + %8:_(<3 x i16>) = G_IMPLICIT_DEF + %9:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %8(<3 x i16>) + %10:_(<3 x i32>) = G_ZEXT %7(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %9(<6 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %10(<3 x i32>) ... --- @@ -247,65 +247,65 @@ body: | ; CHECK-LABEL: name: test_usubo_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND4]], [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND6]] - ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB3]](s32), [[AND7]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND7]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 - %2:_(<4 x s16>), %3:_(<4 x s1>) = G_USUBO %0, %1 - %4:_(<4 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB]](i32), [[AND2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB1]](i32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB2]](i32), [[AND6]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i32) = G_AND [[SUB3]], [[C1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[SUB3]](i32), [[AND7]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[AND2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[AND3]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY3]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY [[AND6]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY [[AND7]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY5]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP2]](i1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[ICMP3]](i1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(i32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(i32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND8]](i32), [[AND9]](i32), [[AND10]](i32), [[AND11]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr1_vgpr2 + %2:_(<4 x i16>), %3:_(<4 x i1>) = G_USUBO %0, %1 + %4:_(<4 x i32>) = G_ZEXT %3(<4 x i1>) + $vgpr0_vgpr1 = COPY %2(<4 x i16>) + $vgpr2_vgpr3_vgpr4_vgpr5 = COPY %4(<4 x i32>) ... --- @@ -317,25 +317,25 @@ body: | ; CHECK-LABEL: name: test_usubo_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBO]](s32), [[USUBO2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBO1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBO3]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>), %3:_(<2 x s1>) = G_USUBO %0, %1 - %4:_(<2 x s32>) = G_ZEXT %3 - $vgpr0_vgpr1 = COPY %2 - $vgpr2_vgpr3 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[USUBO]](i32), [[USUBO2]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBO1]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBO3]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>), %3:_(<2 x i1>) = G_USUBO %0, %1 + %4:_(<2 x i32>) = G_ZEXT %3(<2 x i1>) + $vgpr0_vgpr1 = COPY %2(<2 x i32>) + $vgpr2_vgpr3 = COPY %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 349caeb4fbb97..4cf0e11cec752 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -14,52 +14,52 @@ body: | ; GFX6-LABEL: name: usubsat_s7 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX8-LABEL: name: usubsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[USUBSAT]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: usubsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_USUBSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[USUBSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_USUBSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -71,52 +71,52 @@ body: | ; GFX6-LABEL: name: usubsat_s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX8-LABEL: name: usubsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[USUBSAT]], [[C]](i16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: usubsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_USUBSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C]](i16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C]](i16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i16) = G_LSHR [[USUBSAT]], [[C]](i16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[LSHR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_USUBSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -128,98 +128,98 @@ body: | ; GFX6-LABEL: name: usubsat_v2s8 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C1]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) - ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[COPY2]](s32) - ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C1]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C1]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C1]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C1]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C1]](i32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SUB1]], [[C1]](i32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[COPY2]](i32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[SHL4]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX8-LABEL: name: usubsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C1]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR3]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL4]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[C1]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[C1]](i16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i16) = G_LSHR [[USUBSAT]], [[C1]](i16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[C1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C1]](i16) + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(i16) = G_USUBSAT [[SHL2]], [[SHL3]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i16) = G_LSHR [[USUBSAT1]], [[C1]](i16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i16) = G_SHL [[LSHR3]], [[C1]](i16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[LSHR2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: usubsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[USUBSAT]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s8>) = G_BITCAST %2 - %5:_(<2 x s8>) = G_BITCAST %3 - %6:_(<2 x s8>) = G_USUBSAT %4, %5 - %7:_(s16) = G_BITCAST %6 - %8:_(s32) = G_ANYEXT %7 - $vgpr0 = COPY %8 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC2]](i16), [[TRUNC3]](i16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[C2]](i16), [[C2]](i16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x i16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x i16>) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x i16>) = G_LSHR [[USUBSAT]], [[BUILD_VECTOR2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[LSHR2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C1]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C2]](i16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[OR]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i8>) = G_BITCAST %2(i16) + %5:_(<2 x i8>) = G_BITCAST %3(i16) + %6:_(<2 x i8>) = G_USUBSAT %4, %5 + %7:_(i16) = G_BITCAST %6(<2 x i8>) + %8:_(i32) = G_ANYEXT %7(i16) + $vgpr0 = COPY %8(i32) ... --- @@ -231,44 +231,44 @@ body: | ; GFX6-LABEL: name: usubsat_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[C]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](i32) ; ; GFX8-LABEL: name: usubsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBSAT]](i16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX9-LABEL: name: usubsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_USUBSAT %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[USUBSAT]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_USUBSAT %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -280,62 +280,62 @@ body: | ; GFX6-LABEL: name: usubsat_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL4]] - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SUB1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR2]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX8-LABEL: name: usubsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC2]] - ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC3]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x i16>) ; ; GFX9-LABEL: name: usubsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_USUBSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x i16>) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_USUBSAT %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -347,139 +347,139 @@ body: | ; GFX6-LABEL: name: usubsat_v3s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[SUB1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[SHL4]], [[SHL5]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR4]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR6]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] - ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] - ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT2]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x i16>), [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %3:_(<3 x s16>) = G_USUBSAT %1, %2 - %4:_(<3 x s16>) = G_IMPLICIT_DEF - %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 - $vgpr0_vgpr1_vgpr2 = COPY %5 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC1]](i16), [[TRUNC2]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC3]](i16), [[DEF]](i16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x i16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x i16>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[USUBSAT]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[USUBSAT1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<4 x i16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC4]](i16), [[TRUNC5]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC6]](i16), [[TRUNC7]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC8]](i16), [[TRUNC9]](i16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x i16>), [[BUILD_VECTOR4]](<2 x i16>), [[BUILD_VECTOR5]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i16>), %2:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %3:_(<3 x i16>) = G_USUBSAT %1, %2 + %4:_(<3 x i16>) = G_IMPLICIT_DEF + %5:_(<6 x i16>) = G_CONCAT_VECTORS %3(<3 x i16>), %4(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %5(<6 x i16>) ... --- @@ -491,104 +491,104 @@ body: | ; GFX6-LABEL: name: usubsat_v4s16 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] - ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL9]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[BITCAST]], [[C]](i32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[BITCAST2]], [[C]](i32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[SUB]], [[C]](i32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[SUB1]], [[C]](i32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[BITCAST1]], [[C]](i32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[BITCAST3]], [[C]](i32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(i32) = G_UMIN [[SHL4]], [[SHL5]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(i32) = G_SUB [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(i32) = G_LSHR [[SUB2]], [[C]](i32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(i32) = G_SHL [[LSHR3]], [[C]](i32) + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(i32) = G_UMIN [[SHL6]], [[SHL7]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(i32) = G_SUB [[SHL6]], [[UMIN3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(i32) = G_LSHR [[SUB3]], [[C]](i32) + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(i32) = G_SHL [[LSHR5]], [[C]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(i32) = G_SHL [[LSHR7]], [[C]](i32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR6]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX8-LABEL: name: usubsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC4]] - ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC5]] - ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC6]] - ; GFX8-NEXT: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC3]], [[TRUNC7]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[USUBSAT3:%[0-9]+]]:_(i16) = G_USUBSAT [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT2]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[USUBSAT3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x i16>), [[BITCAST5]](<2 x i16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) ; ; GFX9-LABEL: name: usubsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x s16>), [[USUBSAT1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_USUBSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x i16>), [[UV3:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x i16>) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x i16>) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x i16>), [[USUBSAT1]](<2 x i16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -600,31 +600,31 @@ body: | ; GFX6-LABEL: name: usubsat_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] - ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[COPY]], [[UMIN]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](i32) ; ; GFX8-LABEL: name: usubsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[USUBSAT]](i32) ; ; GFX9-LABEL: name: usubsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_USUBSAT %0, %1 - $vgpr0 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(i32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_USUBSAT %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -636,44 +636,44 @@ body: | ; GFX6-LABEL: name: usubsat_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UMIN]] - ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(i32) = G_UMIN [[UV]], [[UV2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(i32) = G_SUB [[UV]], [[UMIN]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(i32) = G_UMIN [[UV1]], [[UV3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(i32) = G_SUB [[UV1]], [[UMIN1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[SUB]](i32), [[SUB1]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX8-LABEL: name: usubsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] - ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(i32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(i32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[USUBSAT]](i32), [[USUBSAT1]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) ; ; GFX9-LABEL: name: usubsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] - ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_USUBSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(i32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(i32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[USUBSAT]](i32), [[USUBSAT1]](i32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -685,48 +685,48 @@ body: | ; GFX6-LABEL: name: usubsat_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[USUBE1]](i1), [[C]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX8-LABEL: name: usubsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[USUBE1]](i1), [[C]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) ; ; GFX9-LABEL: name: usubsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_USUBSAT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[USUBE1]](i1), [[C]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -738,73 +738,73 @@ body: | ; GFX6-LABEL: name: usubsat_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] - ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[USUBE1]](i1), [[C]], [[MV]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[USUBE3]](i1), [[C]], [[MV1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX8-LABEL: name: usubsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[USUBE1]](i1), [[C]], [[MV]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[USUBE3]](i1), [[C]], [[MV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) ; ; GFX9-LABEL: name: usubsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_USUBSAT %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV]](i64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV2]](i64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(i32), [[USUBO1:%[0-9]+]]:_(i1) = G_USUBO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(i32), [[USUBE1:%[0-9]+]]:_(i1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO]](i32), [[USUBE]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(i64) = G_SELECT [[USUBE1]](i1), [[C]], [[MV]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV1]](i64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[UV3]](i64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(i32), [[USUBO3:%[0-9]+]]:_(i1) = G_USUBO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(i32), [[USUBE3:%[0-9]+]]:_(i1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[USUBO2]](i32), [[USUBE2]](i32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(i64) = G_SELECT [[USUBE3]](i1), [[C]], [[MV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[SELECT]](i64), [[SELECT1]](i64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_USUBSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir index 4328d47969a1e..b53028c0ec98a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir @@ -41,35 +41,35 @@ body: | ; GFX7-LABEL: name: and_v2i16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[TRUNC1]] - ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[LSHR]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR]](<2 x i32>) + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x i32>) + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[TRUNC]], [[TRUNC1]] + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[AND]](<2 x i16>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + ; GFX7-NEXT: $vgpr1 = COPY [[LSHR]](i32) ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(<2 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32) - %0:_(<2 x s16>) = G_TRUNC %5(<2 x s32>) - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32) - %1:_(<2 x s16>) = G_TRUNC %8(<2 x s32>) - %9:_(<2 x s16>) = G_AND %0, %1 - %13:_(s16), %14:_(s16) = G_UNMERGE_VALUES %9(<2 x s16>) - %11:_(s32) = G_ANYEXT %13(s16) - %12:_(s32) = G_ANYEXT %14(s16) - $vgpr0 = COPY %11(s32) - $vgpr1 = COPY %12(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + %3:_(<2 x i16>) = G_TRUNC %2(<2 x i32>) + %4:_(i32) = COPY $vgpr2 + %5:_(i32) = COPY $vgpr3 + %6:_(<2 x i32>) = G_BUILD_VECTOR %4(i32), %5(i32) + %7:_(<2 x i16>) = G_TRUNC %6(<2 x i32>) + %8:_(<2 x i16>) = G_AND %3, %7 + %9:_(i16), %10:_(i16) = G_UNMERGE_VALUES %8(<2 x i16>) + %11:_(i32) = G_ANYEXT %9(i16) + %12:_(i32) = G_ANYEXT %10(i16) + $vgpr0 = COPY %11(i32) + $vgpr1 = COPY %12(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... @@ -83,37 +83,37 @@ body: | ; GFX7-LABEL: name: add_v3i16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX7-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY3]] - ; GFX7-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY4]] - ; GFX7-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] - ; GFX7-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[ADD1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[ADD2]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[COPY3]] + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[COPY1]], [[COPY4]] + ; GFX7-NEXT: [[ADD2:%[0-9]+]]:_(i32) = G_ADD [[COPY2]], [[COPY5]] + ; GFX7-NEXT: $vgpr0 = COPY [[ADD]](i32) + ; GFX7-NEXT: $vgpr1 = COPY [[ADD1]](i32) + ; GFX7-NEXT: $vgpr2 = COPY [[ADD2]](i32) ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(s32) = COPY $vgpr2 - %6:_(<3 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32) - %0:_(<3 x s16>) = G_TRUNC %6(<3 x s32>) - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %10:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) - %1:_(<3 x s16>) = G_TRUNC %10(<3 x s32>) - %11:_(<3 x s16>) = G_ADD %0, %1 - %16:_(s16), %17:_(s16), %18:_(s16) = G_UNMERGE_VALUES %11(<3 x s16>) - %13:_(s32) = G_ANYEXT %16(s16) - %14:_(s32) = G_ANYEXT %17(s16) - %15:_(s32) = G_ANYEXT %18(s16) - $vgpr0 = COPY %13(s32) - $vgpr1 = COPY %14(s32) - $vgpr2 = COPY %15(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + %4:_(<3 x i16>) = G_TRUNC %3(<3 x i32>) + %5:_(i32) = COPY $vgpr3 + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(<3 x i32>) = G_BUILD_VECTOR %5(i32), %6(i32), %7(i32) + %9:_(<3 x i16>) = G_TRUNC %8(<3 x i32>) + %10:_(<3 x i16>) = G_ADD %4, %9 + %11:_(i16), %12:_(i16), %13:_(i16) = G_UNMERGE_VALUES %10(<3 x i16>) + %14:_(i32) = G_ANYEXT %11(i16) + %15:_(i32) = G_ANYEXT %12(i16) + %16:_(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) + $vgpr1 = COPY %15(i32) + $vgpr2 = COPY %16(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... @@ -127,41 +127,41 @@ body: | ; GFX7-LABEL: name: shl_v3i16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[AND1]](s32) - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND2]](s32) - ; GFX7-NEXT: $vgpr0 = COPY [[SHL]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[SHL1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[SHL2]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY3]], [[C]] + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[COPY]], [[AND]](i32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[COPY4]], [[C]] + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[COPY1]], [[AND1]](i32) + ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[COPY5]], [[C]] + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[COPY2]], [[AND2]](i32) + ; GFX7-NEXT: $vgpr0 = COPY [[SHL]](i32) + ; GFX7-NEXT: $vgpr1 = COPY [[SHL1]](i32) + ; GFX7-NEXT: $vgpr2 = COPY [[SHL2]](i32) ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(s32) = COPY $vgpr2 - %6:_(<3 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32) - %0:_(<3 x s16>) = G_TRUNC %6(<3 x s32>) - %7:_(s32) = COPY $vgpr3 - %8:_(s32) = COPY $vgpr4 - %9:_(s32) = COPY $vgpr5 - %10:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) - %1:_(<3 x s16>) = G_TRUNC %10(<3 x s32>) - %11:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) - %16:_(s16), %17:_(s16), %18:_(s16) = G_UNMERGE_VALUES %11(<3 x s16>) - %13:_(s32) = G_ANYEXT %16(s16) - %14:_(s32) = G_ANYEXT %17(s16) - %15:_(s32) = G_ANYEXT %18(s16) - $vgpr0 = COPY %13(s32) - $vgpr1 = COPY %14(s32) - $vgpr2 = COPY %15(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + %4:_(<3 x i16>) = G_TRUNC %3(<3 x i32>) + %5:_(i32) = COPY $vgpr3 + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(<3 x i32>) = G_BUILD_VECTOR %5(i32), %6(i32), %7(i32) + %9:_(<3 x i16>) = G_TRUNC %8(<3 x i32>) + %10:_(<3 x i16>) = G_SHL %4, %9(<3 x i16>) + %11:_(i16), %12:_(i16), %13:_(i16) = G_UNMERGE_VALUES %10(<3 x i16>) + %14:_(i32) = G_ANYEXT %11(i16) + %15:_(i32) = G_ANYEXT %12(i16) + %16:_(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) + $vgpr1 = COPY %15(i32) + $vgpr2 = COPY %16(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... @@ -175,87 +175,144 @@ body: | ; GFX7-LABEL: name: fma_v4f16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; GFX7-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; GFX7-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; GFX7-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX7-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX7-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; GFX7-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] - ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR]](<2 x i32>) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x i32>) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX7-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY4]](i32), [[COPY5]](i32) + ; GFX7-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY6]](i32), [[COPY7]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x i32>) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR3]](<2 x i32>) + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:_(i32) = COPY $vgpr10 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:_(i32) = COPY $vgpr11 + ; GFX7-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY8]](i32), [[COPY9]](i32) + ; GFX7-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY10]](i32), [[COPY11]](i32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR4]](<2 x i32>) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(<2 x i16>) = G_TRUNC [[BUILD_VECTOR5]](<2 x i32>) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[TRUNC]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[TRUNC1]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %87(i16) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %93(i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %88(i16) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %94(i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[TRUNC2]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[TRUNC3]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %98(i16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %103(i16) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %99(i16) + ; GFX7-NEXT: [[BITCAST15:%[0-9]+]]:_(f16) = G_BITCAST %104(i16) + ; GFX7-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX7-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST19:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST18]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST19]](i32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST19]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX7-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[TRUNC4]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[TRUNC5]](<2 x i16>) + ; GFX7-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %108(i16) + ; GFX7-NEXT: [[BITCAST23:%[0-9]+]]:_(f16) = G_BITCAST %113(i16) + ; GFX7-NEXT: [[BITCAST24:%[0-9]+]]:_(f16) = G_BITCAST %109(i16) + ; GFX7-NEXT: [[BITCAST25:%[0-9]+]]:_(f16) = G_BITCAST %114(i16) + ; GFX7-NEXT: [[BITCAST26:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST21]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST27:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST26]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST27]](i32) + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST27]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC15:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX7-NEXT: [[BITCAST28:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST20]](<2 x f16>) + ; GFX7-NEXT: [[BITCAST29:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST28]](<2 x i16>) + ; GFX7-NEXT: [[TRUNC16:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST29]](i32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST29]], [[C]](i32) + ; GFX7-NEXT: [[TRUNC17:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST12]](f16) + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST22]](f16) + ; GFX7-NEXT: [[FMA:%[0-9]+]]:_(f32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA]](f32) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST14]](f16) + ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST24]](f16) + ; GFX7-NEXT: [[FMA1:%[0-9]+]]:_(f32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA1]](f32) + ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST13]](f16) + ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST23]](f16) + ; GFX7-NEXT: [[FMA2:%[0-9]+]]:_(f32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA2]](f32) + ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX7-NEXT: [[FPEXT10:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST15]](f16) + ; GFX7-NEXT: [[FPEXT11:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST25]](f16) + ; GFX7-NEXT: [[FMA3:%[0-9]+]]:_(f32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] + ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMA3]](f32) + ; GFX7-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX7-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX7-NEXT: [[BITCAST32:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; GFX7-NEXT: [[BITCAST33:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST30]](i16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST31]](i16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST32]](i16) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST33]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) + ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](i32) ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %4:_(s32) = COPY $vgpr0 - %5:_(s32) = COPY $vgpr1 - %6:_(s32) = COPY $vgpr2 - %7:_(s32) = COPY $vgpr3 - %8:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) - %0:_(<4 x s16>) = G_TRUNC %8(<4 x s32>) - %9:_(s32) = COPY $vgpr4 - %10:_(s32) = COPY $vgpr5 - %11:_(s32) = COPY $vgpr6 - %12:_(s32) = COPY $vgpr7 - %13:_(<4 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32), %11(s32), %12(s32) - %1:_(<4 x s16>) = G_TRUNC %13(<4 x s32>) - %14:_(s32) = COPY $vgpr8 - %15:_(s32) = COPY $vgpr9 - %16:_(s32) = COPY $vgpr10 - %17:_(s32) = COPY $vgpr11 - %18:_(<4 x s32>) = G_BUILD_VECTOR %14(s32), %15(s32), %16(s32), %17(s32) - %2:_(<4 x s16>) = G_TRUNC %18(<4 x s32>) - %19:_(<4 x s16>) = G_FMA %0, %1, %2 - %25:_(s16), %26:_(s16), %27:_(s16), %28:_(s16) = G_UNMERGE_VALUES %19(<4 x s16>) - %21:_(s32) = G_ANYEXT %25(s16) - %22:_(s32) = G_ANYEXT %26(s16) - %23:_(s32) = G_ANYEXT %27(s16) - %24:_(s32) = G_ANYEXT %28(s16) - $vgpr0 = COPY %21(s32) - $vgpr1 = COPY %22(s32) - $vgpr2 = COPY %23(s32) - $vgpr3 = COPY %24(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(<4 x i16>) = G_TRUNC %4(<4 x i32>) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(i32) = COPY $vgpr6 + %9:_(i32) = COPY $vgpr7 + %10:_(<4 x i32>) = G_BUILD_VECTOR %6(i32), %7(i32), %8(i32), %9(i32) + %11:_(<4 x i16>) = G_TRUNC %10(<4 x i32>) + %12:_(i32) = COPY $vgpr8 + %13:_(i32) = COPY $vgpr9 + %14:_(i32) = COPY $vgpr10 + %15:_(i32) = COPY $vgpr11 + %16:_(<4 x i32>) = G_BUILD_VECTOR %12(i32), %13(i32), %14(i32), %15(i32) + %17:_(<4 x i16>) = G_TRUNC %16(<4 x i32>) + %18:_(<4 x f16>) = G_BITCAST %5(<4 x i16>) + %19:_(<4 x f16>) = G_BITCAST %11(<4 x i16>) + %20:_(<4 x f16>) = G_BITCAST %17(<4 x i16>) + %21:_(<4 x f16>) = G_FMA %18, %19, %20 + %22:_(<4 x i16>) = G_BITCAST %21(<4 x f16>) + %23:_(i16), %24:_(i16), %25:_(i16), %26:_(i16) = G_UNMERGE_VALUES %22(<4 x i16>) + %27:_(i32) = G_ANYEXT %23(i16) + %28:_(i32) = G_ANYEXT %24(i16) + %29:_(i32) = G_ANYEXT %25(i16) + %30:_(i32) = G_ANYEXT %26(i16) + $vgpr0 = COPY %27(i32) + $vgpr1 = COPY %28(i32) + $vgpr2 = COPY %29(i32) + $vgpr3 = COPY %30(i32) SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -268,82 +325,100 @@ body: | ; GFX7-LABEL: name: maxnum_v5i16 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX7-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX7-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX7-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; GFX7-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) - ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; GFX7-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT8]], [[FPEXT9]] - ; GFX7-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE4]](s32) - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) - ; GFX7-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC4]](s16) - ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) - ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](s32) - ; GFX7-NEXT: $vgpr4 = COPY [[ANYEXT4]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(i32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(i32) = COPY $vgpr6 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(i32) = COPY $vgpr7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(i32) = COPY $vgpr8 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(i32) = COPY $vgpr9 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[COPY2]](i32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[COPY3]](i32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[COPY4]](i32) + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX7-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX7-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[COPY5]](i32) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[COPY6]](i32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[COPY7]](i32) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[COPY8]](i32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[COPY9]](i32) + ; GFX7-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX7-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX7-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX7-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX7-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC9]](i16) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST]](f16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST5]](f16) + ; GFX7-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE]](f32) + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST1]](f16) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST6]](f16) + ; GFX7-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE1]](f32) + ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST2]](f16) + ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST7]](f16) + ; GFX7-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE2]](f32) + ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST3]](f16) + ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST8]](f16) + ; GFX7-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE3]](f32) + ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST4]](f16) + ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(f32) = G_FPEXT [[BITCAST9]](f16) + ; GFX7-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(f32) = G_FMAXNUM_IEEE [[FPEXT8]], [[FPEXT9]] + ; GFX7-NEXT: [[FPTRUNC4:%[0-9]+]]:_(f16) = G_FPTRUNC [[FMAXNUM_IEEE4]](f32) + ; GFX7-NEXT: [[BITCAST10:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC]](f16) + ; GFX7-NEXT: [[BITCAST11:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC1]](f16) + ; GFX7-NEXT: [[BITCAST12:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC2]](f16) + ; GFX7-NEXT: [[BITCAST13:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC3]](f16) + ; GFX7-NEXT: [[BITCAST14:%[0-9]+]]:_(i16) = G_BITCAST [[FPTRUNC4]](f16) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST10]](i16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST11]](i16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST12]](i16) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST13]](i16) + ; GFX7-NEXT: [[ANYEXT4:%[0-9]+]]:_(i32) = G_ANYEXT [[BITCAST14]](i16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](i32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](i32) + ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](i32) + ; GFX7-NEXT: $vgpr4 = COPY [[ANYEXT4]](i32) ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = COPY $vgpr2 - %5:_(s32) = COPY $vgpr3 - %6:_(s32) = COPY $vgpr4 - %7:_(<5 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32), %5(s32), %6(s32) - %0:_(<5 x s16>) = G_TRUNC %7(<5 x s32>) - %8:_(s32) = COPY $vgpr5 - %9:_(s32) = COPY $vgpr6 - %10:_(s32) = COPY $vgpr7 - %11:_(s32) = COPY $vgpr8 - %12:_(s32) = COPY $vgpr9 - %13:_(<5 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32), %12(s32) - %1:_(<5 x s16>) = G_TRUNC %13(<5 x s32>) - %15:_(<5 x s16>) = G_FMAXNUM %0, %1 - %21:_(s16), %22:_(s16), %23:_(s16), %24:_(s16), %25:_(s16) = G_UNMERGE_VALUES %15(<5 x s16>) - %16:_(s32) = G_ANYEXT %21(s16) - %17:_(s32) = G_ANYEXT %22(s16) - %18:_(s32) = G_ANYEXT %23(s16) - %19:_(s32) = G_ANYEXT %24(s16) - %20:_(s32) = G_ANYEXT %25(s16) - $vgpr0 = COPY %16(s32) - $vgpr1 = COPY %17(s32) - $vgpr2 = COPY %18(s32) - $vgpr3 = COPY %19(s32) - $vgpr4 = COPY %20(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(<5 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32) + %6:_(<5 x i16>) = G_TRUNC %5(<5 x i32>) + %7:_(i32) = COPY $vgpr5 + %8:_(i32) = COPY $vgpr6 + %9:_(i32) = COPY $vgpr7 + %10:_(i32) = COPY $vgpr8 + %11:_(i32) = COPY $vgpr9 + %12:_(<5 x i32>) = G_BUILD_VECTOR %7(i32), %8(i32), %9(i32), %10(i32), %11(i32) + %13:_(<5 x i16>) = G_TRUNC %12(<5 x i32>) + %14:_(<5 x f16>) = G_BITCAST %6(<5 x i16>) + %15:_(<5 x f16>) = G_BITCAST %13(<5 x i16>) + %16:_(<5 x f16>) = G_FMAXNUM %14, %15 + %17:_(<5 x i16>) = G_BITCAST %16(<5 x f16>) + %18:_(i16), %19:_(i16), %20:_(i16), %21:_(i16), %22:_(i16) = G_UNMERGE_VALUES %17(<5 x i16>) + %23:_(i32) = G_ANYEXT %18(i16) + %24:_(i32) = G_ANYEXT %19(i16) + %25:_(i32) = G_ANYEXT %20(i16) + %26:_(i32) = G_ANYEXT %21(i16) + %27:_(i32) = G_ANYEXT %22(i16) + $vgpr0 = COPY %23(i32) + $vgpr1 = COPY %24(i32) + $vgpr2 = COPY %25(i32) + $vgpr3 = COPY %26(i32) + $vgpr4 = COPY %27(i32) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir index 61af5e01ed4c6..1cc75a7d7a90d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir @@ -42,24 +42,24 @@ body: | ; GFX8-LABEL: name: and_v2i16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND]](<2 x i16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX9-LABEL: name: and_v2i16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x i16>) = G_AND [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](<2 x i16>) ; GFX9-NEXT: SI_RETURN implicit $vgpr0 - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %3:_(<2 x s16>) = G_AND %0, %1 - $vgpr0 = COPY %3(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_AND %0, %1 + $vgpr0 = COPY %2(<2 x i16>) SI_RETURN implicit $vgpr0 ... @@ -72,69 +72,69 @@ body: | ; GFX8-LABEL: name: add_v3i16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(i16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(i16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(i16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[ADD]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[ADD1]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[ADD2]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x i16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x i16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GFX9-LABEL: name: add_v3i16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY1]], [[COPY3]] - ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[ADD1]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x i16>) = G_ADD [[COPY]], [[COPY2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x i16>) = G_ADD [[COPY1]], [[COPY3]] + ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[ADD1]](<2 x i16>) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %3:_(<2 x s16>) = COPY $vgpr0 - %4:_(<2 x s16>) = COPY $vgpr1 - %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) - %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) - %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) - %10:_(<2 x s16>) = COPY $vgpr2 - %11:_(<2 x s16>) = COPY $vgpr3 - %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) - %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) - %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) - %17:_(<3 x s16>) = G_ADD %0, %1 - %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) - %24:_(s16) = G_IMPLICIT_DEF - %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) - %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) - $vgpr0 = COPY %19(<2 x s16>) - $vgpr1 = COPY %20(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(i16), %4:_(i16), %5:_(i16), %6:_(i16) = G_UNMERGE_VALUES %2(<4 x i16>) + %7:_(<3 x i16>) = G_BUILD_VECTOR %3(i16), %4(i16), %5(i16) + %8:_(<2 x i16>) = COPY $vgpr2 + %9:_(<2 x i16>) = COPY $vgpr3 + %10:_(<4 x i16>) = G_CONCAT_VECTORS %8(<2 x i16>), %9(<2 x i16>) + %11:_(i16), %12:_(i16), %13:_(i16), %14:_(i16) = G_UNMERGE_VALUES %10(<4 x i16>) + %15:_(<3 x i16>) = G_BUILD_VECTOR %11(i16), %12(i16), %13(i16) + %16:_(<3 x i16>) = G_ADD %7, %15 + %17:_(i16), %18:_(i16), %19:_(i16) = G_UNMERGE_VALUES %16(<3 x i16>) + %20:_(i16) = G_IMPLICIT_DEF + %21:_(<4 x i16>) = G_BUILD_VECTOR %17(i16), %18(i16), %19(i16), %20(i16) + %22:_(<2 x i16>), %23:_(<2 x i16>) = G_UNMERGE_VALUES %21(<4 x i16>) + $vgpr0 = COPY %22(<2 x i16>) + $vgpr1 = COPY %23(<2 x i16>) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... @@ -147,75 +147,75 @@ body: | ; GFX8-LABEL: name: shl_v3i16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC3]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC1]], [[TRUNC4]](i16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[TRUNC2]], [[TRUNC5]](i16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[SHL]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[SHL1]](i16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[SHL2]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x i16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x i16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GFX9-LABEL: name: shl_v3i16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[COPY2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[DEF]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x i16>) = G_SHL [[COPY]], [[COPY2]](<2 x i16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[SHL1]](i16), [[DEF]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR]](<2 x i16>) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %3:_(<2 x s16>) = COPY $vgpr0 - %4:_(<2 x s16>) = COPY $vgpr1 - %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) - %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) - %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) - %10:_(<2 x s16>) = COPY $vgpr2 - %11:_(<2 x s16>) = COPY $vgpr3 - %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) - %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) - %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) - %17:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) - %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) - %24:_(s16) = G_IMPLICIT_DEF - %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) - %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) - $vgpr0 = COPY %19(<2 x s16>) - $vgpr1 = COPY %20(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(i16), %4:_(i16), %5:_(i16), %6:_(i16) = G_UNMERGE_VALUES %2(<4 x i16>) + %7:_(<3 x i16>) = G_BUILD_VECTOR %3(i16), %4(i16), %5(i16) + %8:_(<2 x i16>) = COPY $vgpr2 + %9:_(<2 x i16>) = COPY $vgpr3 + %10:_(<4 x i16>) = G_CONCAT_VECTORS %8(<2 x i16>), %9(<2 x i16>) + %11:_(i16), %12:_(i16), %13:_(i16), %14:_(i16) = G_UNMERGE_VALUES %10(<4 x i16>) + %15:_(<3 x i16>) = G_BUILD_VECTOR %11(i16), %12(i16), %13(i16) + %16:_(<3 x i16>) = G_SHL %7, %15(<3 x i16>) + %17:_(i16), %18:_(i16), %19:_(i16) = G_UNMERGE_VALUES %16(<3 x i16>) + %20:_(i16) = G_IMPLICIT_DEF + %21:_(<4 x i16>) = G_BUILD_VECTOR %17(i16), %18(i16), %19(i16), %20(i16) + %22:_(<2 x i16>), %23:_(<2 x i16>) = G_UNMERGE_VALUES %21(<4 x i16>) + $vgpr0 = COPY %22(<2 x i16>) + $vgpr1 = COPY %23(<2 x i16>) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... @@ -228,82 +228,125 @@ body: | ; GFX8-LABEL: name: fma_v4f16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX8-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] - ; GFX8-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] - ; GFX8-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] - ; GFX8-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(f16) = G_BITCAST %40(i16) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(f16) = G_BITCAST %46(i16) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(f16) = G_BITCAST %41(i16) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(f16) = G_BITCAST %47(i16) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST1]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST6]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST7]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST7]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST8]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST9]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST9]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST %51(i16) + ; GFX8-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST %56(i16) + ; GFX8-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST %52(i16) + ; GFX8-NEXT: [[BITCAST15:%[0-9]+]]:_(f16) = G_BITCAST %57(i16) + ; GFX8-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST11]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST17:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST16]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST17]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST17]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST10]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST19:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST18]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST19]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST19]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY4]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY5]](<2 x i16>) + ; GFX8-NEXT: [[BITCAST22:%[0-9]+]]:_(f16) = G_BITCAST %61(i16) + ; GFX8-NEXT: [[BITCAST23:%[0-9]+]]:_(f16) = G_BITCAST %66(i16) + ; GFX8-NEXT: [[BITCAST24:%[0-9]+]]:_(f16) = G_BITCAST %62(i16) + ; GFX8-NEXT: [[BITCAST25:%[0-9]+]]:_(f16) = G_BITCAST %67(i16) + ; GFX8-NEXT: [[BITCAST26:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST21]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST27:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST26]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST27]](i32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST27]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX8-NEXT: [[BITCAST28:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[BITCAST20]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST29:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST28]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST29]](i32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST29]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX8-NEXT: [[FMA:%[0-9]+]]:_(f16) = G_FMA [[BITCAST2]], [[BITCAST12]], [[BITCAST22]] + ; GFX8-NEXT: [[FMA1:%[0-9]+]]:_(f16) = G_FMA [[BITCAST4]], [[BITCAST14]], [[BITCAST24]] + ; GFX8-NEXT: [[FMA2:%[0-9]+]]:_(f16) = G_FMA [[BITCAST3]], [[BITCAST13]], [[BITCAST23]] + ; GFX8-NEXT: [[FMA3:%[0-9]+]]:_(f16) = G_FMA [[BITCAST5]], [[BITCAST15]], [[BITCAST25]] + ; GFX8-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[FMA]](f16) + ; GFX8-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[FMA1]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST30]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST31]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST32:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[BITCAST33:%[0-9]+]]:_(i16) = G_BITCAST [[FMA2]](f16) + ; GFX8-NEXT: [[BITCAST34:%[0-9]+]]:_(i16) = G_BITCAST [[FMA3]](f16) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST33]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST34]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST35:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x f16>) = G_CONCAT_VECTORS [[BITCAST32]](<2 x f16>), [[BITCAST35]](<2 x f16>) + ; GFX8-NEXT: [[BITCAST36:%[0-9]+]]:_(<4 x i16>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[BITCAST36]](<4 x i16>) + ; GFX8-NEXT: $vgpr0 = COPY [[UV]](<2 x i16>) + ; GFX8-NEXT: $vgpr1 = COPY [[UV1]](<2 x i16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; GFX9-LABEL: name: fma_v4f16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY2]], [[COPY4]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY1]], [[COPY3]], [[COPY5]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY4]](<2 x i16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x f16>) = G_BITCAST [[COPY5]](<2 x i16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x f16>) = G_FMA [[BITCAST]], [[BITCAST2]], [[BITCAST4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x f16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMA]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMA1]](<2 x f16>) + ; GFX9-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x i16>) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %4:_(<2 x s16>) = COPY $vgpr0 - %5:_(<2 x s16>) = COPY $vgpr1 - %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) - %6:_(<2 x s16>) = COPY $vgpr2 - %7:_(<2 x s16>) = COPY $vgpr3 - %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) - %8:_(<2 x s16>) = COPY $vgpr4 - %9:_(<2 x s16>) = COPY $vgpr5 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %10:_(<4 x s16>) = G_FMA %0, %1, %2 - %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %10(<4 x s16>) - $vgpr0 = COPY %12(<2 x s16>) - $vgpr1 = COPY %13(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + %3:_(<2 x i16>) = COPY $vgpr2 + %4:_(<2 x i16>) = COPY $vgpr3 + %5:_(<4 x i16>) = G_CONCAT_VECTORS %3(<2 x i16>), %4(<2 x i16>) + %6:_(<2 x i16>) = COPY $vgpr4 + %7:_(<2 x i16>) = COPY $vgpr5 + %8:_(<4 x i16>) = G_CONCAT_VECTORS %6(<2 x i16>), %7(<2 x i16>) + %9:_(<4 x f16>) = G_BITCAST %2(<4 x i16>) + %10:_(<4 x f16>) = G_BITCAST %5(<4 x i16>) + %11:_(<4 x f16>) = G_BITCAST %8(<4 x i16>) + %12:_(<4 x f16>) = G_FMA %9, %10, %11 + %13:_(<4 x i16>) = G_BITCAST %12(<4 x f16>) + %14:_(<2 x i16>), %15:_(<2 x i16>) = G_UNMERGE_VALUES %13(<4 x i16>) + $vgpr0 = COPY %14(<2 x i16>) + $vgpr1 = COPY %15(<2 x i16>) SI_RETURN implicit $vgpr0, implicit $vgpr1 ... @@ -316,109 +359,192 @@ body: | ; GFX8-LABEL: name: maxnum_v5i16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX8-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX8-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; GFX8-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX8-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; GFX8-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; GFX8-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; GFX8-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; GFX8-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC8]] - ; GFX8-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]] - ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) - ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY4]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY5]](<2 x i16>) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX8-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX8-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX8-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX8-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX8-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX8-NEXT: [[BITCAST15:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC9]](i16) + ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST6]] + ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST11]] + ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX8-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST7]] + ; GFX8-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST12]] + ; GFX8-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX8-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST8]] + ; GFX8-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST13]] + ; GFX8-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; GFX8-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST9]] + ; GFX8-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST14]] + ; GFX8-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST10]] + ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(f16) = G_FCANONICALIZE [[BITCAST15]] + ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] + ; GFX8-NEXT: [[BITCAST16:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; GFX8-NEXT: [[BITCAST17:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE1]](f16) + ; GFX8-NEXT: [[BITCAST18:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE2]](f16) + ; GFX8-NEXT: [[BITCAST19:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE3]](f16) + ; GFX8-NEXT: [[BITCAST20:%[0-9]+]]:_(i16) = G_BITCAST [[FMAXNUM_IEEE4]](f16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST16]](i16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST17]](i16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST18]](i16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST19]](i16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C]](i32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(i32) = G_ZEXT [[BITCAST20]](i16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[C1]], [[C]](i32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[ZEXT4]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST23:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST21]](<2 x i16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST22]](<2 x i16>) + ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST23]](<2 x i16>) ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; ; GFX9-LABEL: name: maxnum_v5i16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY3]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY4]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY2]] - ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY5]] - ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[FMAXNUM_IEEE2]](<2 x s16>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST1]](i32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[COPY2]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST2]](i32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[COPY3]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST3]](i32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST3]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[COPY4]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST4]](i32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR3]](i32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[COPY5]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST5]](i32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC1]](i16) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC2]](i16) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC3]](i16) + ; GFX9-NEXT: [[BITCAST10:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC4]](i16) + ; GFX9-NEXT: [[BITCAST11:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC5]](i16) + ; GFX9-NEXT: [[BITCAST12:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC6]](i16) + ; GFX9-NEXT: [[BITCAST13:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC7]](i16) + ; GFX9-NEXT: [[BITCAST14:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC8]](i16) + ; GFX9-NEXT: [[BITCAST15:%[0-9]+]]:_(f16) = G_BITCAST [[TRUNC9]](i16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(f16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST6]](f16), [[BITCAST7]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST8]](f16), [[BITCAST9]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST10]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST11]](f16), [[BITCAST12]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST13]](f16), [[BITCAST14]](f16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x f16>) = G_BUILD_VECTOR [[BITCAST15]](f16), [[DEF]](f16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x f16>) = G_FCANONICALIZE [[BUILD_VECTOR5]] + ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x f16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; GFX9-NEXT: [[BITCAST16:%[0-9]+]]:_(f16) = G_BITCAST %123(i16) + ; GFX9-NEXT: [[BITCAST17:%[0-9]+]]:_(f16) = G_BITCAST %128(i16) + ; GFX9-NEXT: [[BITCAST18:%[0-9]+]]:_(f16) = G_BITCAST %124(i16) + ; GFX9-NEXT: [[BITCAST19:%[0-9]+]]:_(f16) = G_BITCAST %129(i16) + ; GFX9-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST21:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST20]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST21]](i32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST21]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR4]](i32) + ; GFX9-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST23:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST22]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST23]](i32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST23]], [[C]](i32) + ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR5]](i32) + ; GFX9-NEXT: [[BITCAST24:%[0-9]+]]:_(f16) = G_BITCAST %133(i16) + ; GFX9-NEXT: [[BITCAST25:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST16]](f16) + ; GFX9-NEXT: [[BITCAST26:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x f16>) + ; GFX9-NEXT: [[BITCAST27:%[0-9]+]]:_(i32) = G_BITCAST [[BITCAST26]](<2 x i16>) + ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST27]](i32) + ; GFX9-NEXT: [[BITCAST28:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST18]](f16) + ; GFX9-NEXT: [[BITCAST29:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST17]](f16) + ; GFX9-NEXT: [[BITCAST30:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST19]](f16) + ; GFX9-NEXT: [[BITCAST31:%[0-9]+]]:_(i16) = G_BITCAST [[BITCAST24]](f16) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST25]](i16), [[BITCAST28]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST29]](i16), [[BITCAST30]](i16) + ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[BITCAST31]](i16), [[DEF1]](i16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR6]](<2 x i16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x i16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR8]](<2 x i16>) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %2:_(<2 x s16>) = COPY $vgpr0 - %3:_(<2 x s16>) = COPY $vgpr1 - %4:_(<2 x s16>) = COPY $vgpr2 - %5:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>) - %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16) = G_UNMERGE_VALUES %5(<6 x s16>) - %0:_(<5 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16), %9(s16), %10(s16) - %12:_(<2 x s16>) = COPY $vgpr3 - %13:_(<2 x s16>) = COPY $vgpr4 - %14:_(<2 x s16>) = COPY $vgpr5 - %15:_(<6 x s16>) = G_CONCAT_VECTORS %12(<2 x s16>), %13(<2 x s16>), %14(<2 x s16>) - %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16), %20:_(s16), %21:_(s16) = G_UNMERGE_VALUES %15(<6 x s16>) - %1:_(<5 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16), %20(s16) - %23:_(<5 x s16>) = G_FMAXNUM %0, %1 - %27:_(s16), %28:_(s16), %29:_(s16), %30:_(s16), %31:_(s16) = G_UNMERGE_VALUES %23(<5 x s16>) - %32:_(s16) = G_IMPLICIT_DEF - %33:_(<6 x s16>) = G_BUILD_VECTOR %27(s16), %28(s16), %29(s16), %30(s16), %31(s16), %32(s16) - %24:_(<2 x s16>), %25:_(<2 x s16>), %26:_(<2 x s16>) = G_UNMERGE_VALUES %33(<6 x s16>) - $vgpr0 = COPY %24(<2 x s16>) - $vgpr1 = COPY %25(<2 x s16>) - $vgpr2 = COPY %26(<2 x s16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = COPY $vgpr2 + %3:_(<6 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>), %2(<2 x i16>) + %4:_(i16), %5:_(i16), %6:_(i16), %7:_(i16), %8:_(i16), %9:_(i16) = G_UNMERGE_VALUES %3(<6 x i16>) + %10:_(<5 x i16>) = G_BUILD_VECTOR %4(i16), %5(i16), %6(i16), %7(i16), %8(i16) + %11:_(<2 x i16>) = COPY $vgpr3 + %12:_(<2 x i16>) = COPY $vgpr4 + %13:_(<2 x i16>) = COPY $vgpr5 + %14:_(<6 x i16>) = G_CONCAT_VECTORS %11(<2 x i16>), %12(<2 x i16>), %13(<2 x i16>) + %15:_(i16), %16:_(i16), %17:_(i16), %18:_(i16), %19:_(i16), %20:_(i16) = G_UNMERGE_VALUES %14(<6 x i16>) + %21:_(<5 x i16>) = G_BUILD_VECTOR %15(i16), %16(i16), %17(i16), %18(i16), %19(i16) + %22:_(<5 x f16>) = G_BITCAST %10(<5 x i16>) + %23:_(<5 x f16>) = G_BITCAST %21(<5 x i16>) + %24:_(<5 x f16>) = G_FMAXNUM %22, %23 + %25:_(<5 x i16>) = G_BITCAST %24(<5 x f16>) + %26:_(i16), %27:_(i16), %28:_(i16), %29:_(i16), %30:_(i16) = G_UNMERGE_VALUES %25(<5 x i16>) + %31:_(i16) = G_IMPLICIT_DEF + %32:_(<6 x i16>) = G_BUILD_VECTOR %26(i16), %27(i16), %28(i16), %29(i16), %30(i16), %31(i16) + %33:_(<2 x i16>), %34:_(<2 x i16>), %35:_(<2 x i16>) = G_UNMERGE_VALUES %32(<6 x i16>) + $vgpr0 = COPY %33(<2 x i16>) + $vgpr1 = COPY %34(<2 x i16>) + $vgpr2 = COPY %35(<2 x i16>) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index a993afc22b0c2..86c3d68544add 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -10,14 +10,14 @@ body: | ; CHECK-LABEL: name: test_xor_s32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_XOR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_XOR %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -29,17 +29,17 @@ body: | ; CHECK-LABEL: name: test_xor_s1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(ne), %0, %2 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s32) = G_XOR %0, %1 - S_NOP 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i32) = G_XOR %0, %1 + S_NOP 0, implicit %5(i32) ... --- @@ -51,31 +51,31 @@ body: | ; CHECK-LABEL: name: test_xor_v2s1 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %2 - %5:_(<2 x s1>) = G_XOR %3, %4 - %6:_(<2 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV4]](i32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV5]](i32), [[UV7]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %1 + %4:_(<2 x i1>) = G_ICMP intpred(ne), %0(<2 x i32>), %2 + %5:_(<2 x i1>) = G_XOR %3, %4 + %6:_(<2 x i32>) = G_ANYEXT %5(<2 x i1>) + $vgpr0_vgpr1 = COPY %6(<2 x i32>) ... --- @@ -87,35 +87,35 @@ body: | ; CHECK-LABEL: name: test_xor_v3s1 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP3]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP4]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP5]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 - %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %2 - %5:_(<3 x s1>) = G_XOR %3, %4 - %6:_(<3 x s32>) = G_ANYEXT %5 - $vgpr0_vgpr1_vgpr2 = COPY %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV]](i32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV1]](i32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV2]](i32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(i32), [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY2]](<3 x i32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV6]](i32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV7]](i32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(i1) = G_ICMP intpred(ne), [[UV8]](i32), [[UV11]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i1) = G_XOR [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i1) = G_XOR [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i1) = G_XOR [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR1]](i1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR2]](i1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[ANYEXT]](i32), [[ANYEXT1]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %1 + %4:_(<3 x i1>) = G_ICMP intpred(ne), %0(<3 x i32>), %2 + %5:_(<3 x i1>) = G_XOR %3, %4 + %6:_(<3 x i32>) = G_ANYEXT %5(<3 x i1>) + $vgpr0_vgpr1_vgpr2 = COPY %6(<3 x i32>) ... --- @@ -127,14 +127,14 @@ body: | ; CHECK-LABEL: name: test_xor_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_XOR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_XOR %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -146,21 +146,21 @@ body: | ; CHECK-LABEL: name: test_xor_s96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[XOR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(s96) = G_XOR %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY]](i96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY]](i96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(i64) = G_EXTRACT [[COPY1]](i96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(i32) = G_EXTRACT [[COPY1]](i96), 64 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](i64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[XOR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](i96) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(i96) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(i96) ... --- @@ -172,18 +172,18 @@ body: | ; CHECK-LABEL: name: test_xor_128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[XOR]](s64), [[XOR1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(s128) = G_XOR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](i128) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[XOR]](i64), [[XOR1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(i128) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(i128) ... --- @@ -195,17 +195,17 @@ body: | ; CHECK-LABEL: name: test_xor_s7 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s7) = G_TRUNC %0 - %3:_(s7) = G_TRUNC %1 - %4:_(s7) = G_XOR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i7) = G_TRUNC %0(i32) + %3:_(i7) = G_TRUNC %1(i32) + %4:_(i7) = G_XOR %2, %3 + %5:_(i32) = G_ANYEXT %4(i7) + $vgpr0 = COPY %5(i32) ... --- @@ -217,17 +217,17 @@ body: | ; CHECK-LABEL: name: test_xor_s8 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_XOR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i8) = G_TRUNC %0(i32) + %3:_(i8) = G_TRUNC %1(i32) + %4:_(i8) = G_XOR %2, %3 + %5:_(i32) = G_ANYEXT %4(i8) + $vgpr0 = COPY %5(i32) ... --- @@ -239,20 +239,20 @@ body: | ; CHECK-LABEL: name: test_xor_s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_XOR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_XOR %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -264,20 +264,20 @@ body: | ; CHECK-LABEL: name: test_xor_s24 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_XOR %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i16) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i32) = G_ANYEXT [[XOR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_XOR %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -289,17 +289,17 @@ body: | ; CHECK-LABEL: name: test_xor_s48 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s48) = G_TRUNC %0 - %3:_(s48) = G_TRUNC %1 - %4:_(s48) = G_XOR %2, %3 - %5:_(s64) = G_ANYEXT %4 - $vgpr0_vgpr1 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i48) = G_TRUNC %0(i64) + %3:_(i48) = G_TRUNC %1(i64) + %4:_(i48) = G_XOR %2, %3 + %5:_(i64) = G_ANYEXT %4(i48) + $vgpr0_vgpr1 = COPY %5(i64) ... --- @@ -311,14 +311,14 @@ body: | ; CHECK-LABEL: name: test_xor_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_XOR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x i32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_XOR %0, %1 + $vgpr0_vgpr1 = COPY %2(<2 x i32>) ... --- @@ -330,21 +330,21 @@ body: | ; CHECK-LABEL: name: test_xor_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV5]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[XOR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = G_XOR %0, %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](<3 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV3]](i32), [[UV4]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x i32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](<2 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[UV6]](i32), [[UV7]](i32), [[XOR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i32>) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -356,18 +356,18 @@ body: | ; CHECK-LABEL: name: test_xor_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = G_XOR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x i32>) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x i32>) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[XOR]](<2 x i32>), [[XOR1]](<2 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x i32>) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<4 x i32>) ... --- @@ -376,29 +376,29 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]] - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) - %0:_(<5 x s32>) = G_IMPLICIT_DEF - %1:_(<5 x s32>) = G_IMPLICIT_DEF - %2:_(<5 x s32>) = G_XOR %0, %1 - %3:_(<8 x s32>) = G_IMPLICIT_DEF - %4:_(<8 x s32>) = G_INSERT %3, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32), [[UV4:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<5 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV2]](i32), [[UV3]](i32) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32), [[UV8:%[0-9]+]]:_(i32), [[UV9:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<5 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV5]](i32), [[UV6]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[UV7]](i32), [[UV8]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x i32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x i32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(i32), [[UV11:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR]](<2 x i32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(i32), [[UV13:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[XOR1]](<2 x i32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(i32), [[UV15:%[0-9]+]]:_(i32), [[UV16:%[0-9]+]]:_(i32), [[UV17:%[0-9]+]]:_(i32), [[UV18:%[0-9]+]]:_(i32), [[UV19:%[0-9]+]]:_(i32), [[UV20:%[0-9]+]]:_(i32), [[UV21:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF2]](<8 x i32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x i32>) = G_BUILD_VECTOR [[UV10]](i32), [[UV11]](i32), [[UV12]](i32), [[UV13]](i32), [[XOR2]](i32), [[UV19]](i32), [[UV20]](i32), [[UV21]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x i32>) + %0:_(<5 x i32>) = G_IMPLICIT_DEF + %1:_(<5 x i32>) = G_IMPLICIT_DEF + %2:_(<5 x i32>) = G_XOR %0, %1 + %3:_(<8 x i32>) = G_IMPLICIT_DEF + %4:_(<8 x i32>) = G_INSERT %3, %2(<5 x i32>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<8 x i32>) ... --- @@ -410,18 +410,18 @@ body: | ; CHECK-LABEL: name: test_xor_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[XOR]](s64), [[XOR1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = G_XOR %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](<2 x i64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i64), [[UV3:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY1]](<2 x i64>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[XOR]](i64), [[XOR1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x i64>) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x i64>) ... --- @@ -433,14 +433,14 @@ body: | ; CHECK-LABEL: name: test_xor_v2s16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_XOR %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x i16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_XOR %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -451,41 +451,41 @@ body: | ; CHECK-LABEL: name: test_xor_v3s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 - %4:_(<3 x s16>), %5:_(<3 x s16>) = G_UNMERGE_VALUES %1 - %6:_(<3 x s16>) = G_XOR %2, %4 - %7:_(<3 x s16>) = G_IMPLICIT_DEF - %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 - $vgpr0_vgpr1_vgpr2 = COPY %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<6 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x i16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[XOR]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV9]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[AND2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x i16>) = G_CONCAT_VECTORS [[UV6]](<2 x i16>), [[BITCAST3]](<2 x i16>), [[BITCAST4]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x i16>), %3:_(<3 x i16>) = G_UNMERGE_VALUES %0(<6 x i16>) + %4:_(<3 x i16>), %5:_(<3 x i16>) = G_UNMERGE_VALUES %1(<6 x i16>) + %6:_(<3 x i16>) = G_XOR %2, %4 + %7:_(<3 x i16>) = G_IMPLICIT_DEF + %8:_(<6 x i16>) = G_CONCAT_VECTORS %6(<3 x i16>), %7(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %8(<6 x i16>) ... --- @@ -497,14 +497,14 @@ body: | ; CHECK-LABEL: name: test_xor_v4s16 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_XOR %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x i16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_XOR %0, %1 + $vgpr0_vgpr1 = COPY %2(<4 x i16>) ... --- @@ -513,70 +513,70 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) - %0:_(<5 x s16>) = G_IMPLICIT_DEF - %1:_(<5 x s16>) = G_IMPLICIT_DEF - %2:_(<5 x s16>) = G_XOR %0, %1 - %4:_(<8 x s16>) = G_IMPLICIT_DEF - %5:_(<8 x s16>) = G_INSERT %4, %2, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>), [[UV2:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV2]](<2 x i16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x i16>), [[UV4:%[0-9]+]]:_(<2 x i16>), [[UV5:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF1]](<6 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(i32) = G_BITCAST [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST2]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(i32) = G_BITCAST [[UV5]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[UV3]](<2 x i16>), [[UV4]](<2 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x i16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x i16>), [[UV7:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[XOR]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(i32) = G_BITCAST [[UV6]](<2 x i16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST4]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(i32) = G_BITCAST [[UV7]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[AND]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR1]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x i16>), [[BITCAST7]](<2 x i16>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[AND1]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[LSHR1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR2]](i32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR3]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x i16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x i16>), [[BITCAST9]](<2 x i16>) + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x i16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x i16>), [[UV9:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[XOR1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(i32) = G_BITCAST [[UV8]](<2 x i16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST10]], [[C]](i32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x i16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x i16>), [[UV11:%[0-9]+]]:_(<2 x i16>), [[UV12:%[0-9]+]]:_(<2 x i16>), [[UV13:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[DEF2]](<8 x i16>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(i32) = G_BITCAST [[UV12]](<2 x i16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST11]], [[C]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND2]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR4]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND4]], [[C]](i32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND3]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR5]](i32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[LSHR4]], [[C]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x i16>) = G_BITCAST [[OR6]](i32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x i16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x i16>), [[BITCAST13]](<2 x i16>), [[BITCAST14]](<2 x i16>), [[UV13]](<2 x i16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x i16>) + %0:_(<5 x i16>) = G_IMPLICIT_DEF + %1:_(<5 x i16>) = G_IMPLICIT_DEF + %2:_(<5 x i16>) = G_XOR %0, %1 + %3:_(<8 x i16>) = G_IMPLICIT_DEF + %4:_(<8 x i16>) = G_INSERT %3, %2(<5 x i16>), 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<8 x i16>) ... --- @@ -585,20 +585,20 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s8>) = G_IMPLICIT_DEF - %1:_(<3 x s8>) = G_IMPLICIT_DEF - %2:_(<3 x s8>) = G_XOR %0, %1 - %3:_(<3 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<4 x i32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV4]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[XOR]](i32), [[XOR1]](i32), [[XOR2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<3 x i8>) = G_IMPLICIT_DEF + %1:_(<3 x i8>) = G_IMPLICIT_DEF + %2:_(<3 x i8>) = G_XOR %0, %1 + %3:_(<3 x i32>) = G_ANYEXT %2(<3 x i8>) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x i32>) ... --- @@ -607,19 +607,19 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[UV3]], [[UV7]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32), [[XOR3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s8>) = G_IMPLICIT_DEF - %1:_(<4 x s8>) = G_IMPLICIT_DEF - %2:_(<4 x s8>) = G_XOR %0, %1 - %3:_(<4 x s32>) = G_ANYEXT %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF]](<4 x i32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](<4 x i32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[UV]], [[UV4]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i32) = G_XOR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i32) = G_XOR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(i32) = G_XOR [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[XOR]](i32), [[XOR1]](i32), [[XOR2]](i32), [[XOR3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i8>) = G_IMPLICIT_DEF + %1:_(<4 x i8>) = G_IMPLICIT_DEF + %2:_(<4 x i8>) = G_XOR %0, %1 + %3:_(<4 x i32>) = G_ANYEXT %2(<4 x i8>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir index 0b34dffc5004f..0516aa3e54c54 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ZEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ZEXT %0(i32) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -27,15 +27,15 @@ body: | ; CHECK-LABEL: name: test_zext_s16_to_s64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_ZEXT %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_ZEXT %1(i16) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -47,14 +47,14 @@ body: | ; CHECK-LABEL: name: test_zext_s16_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = G_ZEXT %1(i16) + $vgpr0 = COPY %2(i32) ... --- @@ -66,14 +66,14 @@ body: | ; CHECK-LABEL: name: test_zext_s24_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(s32) = G_ZEXT %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i24) = G_TRUNC %0(i32) + %2:_(i32) = G_ZEXT %1(i24) + $vgpr0 = COPY %2(i32) ... --- @@ -85,16 +85,16 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s96 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) - %0:_(s32) = COPY $vgpr0 - %1:_(s96) = G_ZEXT %0 - $vgpr0_vgpr1_vgpr2 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i96) = G_TRUNC [[MV1]](i192) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](i96) + %0:_(i32) = COPY $vgpr0 + %1:_(i96) = G_ZEXT %0(i32) + $vgpr0_vgpr1_vgpr2 = COPY %1(i96) ... --- @@ -103,11 +103,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_zext_i1_to_s32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - %0:_(s1) = G_CONSTANT i1 0 - %1:_(s32) = G_ZEXT %0 - $vgpr0 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](i32) + %0:_(i1) = G_CONSTANT i1 false + %1:_(i32) = G_ZEXT %0(i1) + $vgpr0 = COPY %1(i32) ... --- @@ -116,11 +116,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_zext_i1_to_i64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s1) = G_CONSTANT i1 0 - %1:_(s64) = G_ZEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](i64) + %0:_(i1) = G_CONSTANT i1 false + %1:_(i64) = G_ZEXT %0(i1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -132,17 +132,17 @@ body: | ; CHECK-LABEL: name: test_zext_v2s16_to_v2s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s32>) = G_ZEXT %0 - $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[LSHR]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i32>) = G_ZEXT %0(<2 x i16>) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -154,21 +154,21 @@ body: | ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_EXTRACT %0, 0 - %2:_(<3 x s32>) = G_ZEXT %1 - $vgpr0_vgpr1_vgpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[LSHR]](i32), [[AND1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x i16>) = G_EXTRACT %0(<4 x i16>), 0 + %2:_(<3 x i32>) = G_ZEXT %1(<3 x i16>) + $vgpr0_vgpr1_vgpr2 = COPY %2(<3 x i32>) ... --- @@ -180,21 +180,21 @@ body: | ; CHECK-LABEL: name: test_zext_v4s16_to_v4s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32), [[LSHR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_ZEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i16>), [[UV1:%[0-9]+]]:_(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[UV]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST [[UV1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST1]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[LSHR]](i32), [[AND1]](i32), [[LSHR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i32>) = G_ZEXT %0(<4 x i16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -206,15 +206,15 @@ body: | ; CHECK-LABEL: name: test_zext_v2s32_to_v2s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_ZEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV]](i32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[ZEXT]](i64), [[ZEXT1]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x i64>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i64>) = G_ZEXT %0(<2 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -226,16 +226,16 @@ body: | ; CHECK-LABEL: name: test_zext_v3s32_to_v3s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s64>) = G_ZEXT %0 - S_NOP 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<3 x i32>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV]](i32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[UV2]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x i64>) = G_BUILD_VECTOR [[ZEXT]](i64), [[ZEXT1]](i64), [[ZEXT2]](i64) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x i64>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i64>) = G_ZEXT %0(<3 x i32>) + S_NOP 0, implicit %1(<3 x i64>) ... @@ -248,17 +248,17 @@ body: | ; CHECK-LABEL: name: test_zext_v4s32_to_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s64>) = G_ZEXT %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[UV]](i32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i64) = G_ZEXT [[UV1]](i32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i64) = G_ZEXT [[UV2]](i32) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i64) = G_ZEXT [[UV3]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i64>) = G_BUILD_VECTOR [[ZEXT]](i64), [[ZEXT1]](i64), [[ZEXT2]](i64), [[ZEXT3]](i64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x i64>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i64>) = G_ZEXT %0(<4 x i32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<4 x i64>) ... --- @@ -270,15 +270,15 @@ body: | ; CHECK-LABEL: name: test_zext_s8_to_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s16) = G_ZEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i16) = G_ZEXT %1(i8) + S_ENDPGM 0, implicit %2(i16) ... --- @@ -290,15 +290,15 @@ body: | ; CHECK-LABEL: name: test_zext_s8_to_s24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[AND]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s24) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s24) = G_ZEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i24) = G_TRUNC [[AND]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i24) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i24) = G_ZEXT %1(i8) + S_ENDPGM 0, implicit %2(i24) ... @@ -311,14 +311,14 @@ body: | ; CHECK-LABEL: name: test_zext_s7_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s7) = G_TRUNC %0 - %2:_(s32) = G_ZEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i7) = G_TRUNC %0(i32) + %2:_(i32) = G_ZEXT %1(i7) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -330,14 +330,14 @@ body: | ; CHECK-LABEL: name: test_zext_s8_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s32) = G_ZEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i8) = G_TRUNC %0(i32) + %2:_(i32) = G_ZEXT %1(i8) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -349,15 +349,15 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s128 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s128) - %0:_(s32) = COPY $vgpr0 - %1:_(s128) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i128) + %0:_(i32) = COPY $vgpr0 + %1:_(i128) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -369,16 +369,16 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s160 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s160) - %0:_(s32) = COPY $vgpr0 - %1:_(s160) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i320) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i160) = G_TRUNC [[MV1]](i320) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i160) + %0:_(i32) = COPY $vgpr0 + %1:_(i160) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i160) ... @@ -391,15 +391,15 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s192 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s192) - %0:_(s32) = COPY $vgpr0 - %1:_(s192) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i192) + %0:_(i32) = COPY $vgpr0 + %1:_(i192) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i192) ... --- @@ -411,16 +411,16 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s224 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224) - %0:_(s32) = COPY $vgpr0 - %1:_(s224) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i224) = G_TRUNC [[MV1]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i224) + %0:_(i32) = COPY $vgpr0 + %1:_(i224) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i224) ... --- @@ -432,15 +432,15 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s256 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s256) - %0:_(s32) = COPY $vgpr0 - %1:_(s256) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i256) + %0:_(i32) = COPY $vgpr0 + %1:_(i256) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -452,15 +452,15 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s512 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s512) - %0:_(s32) = COPY $vgpr0 - %1:_(s512) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i512) + %0:_(i32) = COPY $vgpr0 + %1:_(i512) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i512) ... --- @@ -472,16 +472,16 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s992 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224) - %0:_(s32) = COPY $vgpr0 - %1:_(s224) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i448) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i224) = G_TRUNC [[MV1]](i448) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i224) + %0:_(i32) = COPY $vgpr0 + %1:_(i224) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i224) ... --- @@ -493,15 +493,15 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s1024 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s1024) - %0:_(s32) = COPY $vgpr0 - %1:_(s1024) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[MV]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64), [[C1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](i1024) + %0:_(i32) = COPY $vgpr0 + %1:_(i1024) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i1024) ... --- @@ -513,13 +513,13 @@ body: | ; CHECK-LABEL: name: test_zext_s64_to_s128 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[COPY]](i64), [[C]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i128) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = G_ZEXT %0(i64) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -531,13 +531,13 @@ body: | ; CHECK-LABEL: name: test_zext_s64_to_s192 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s192) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i192) = G_MERGE_VALUES [[COPY]](i64), [[C]](i64), [[C]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i192) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i192) = G_ZEXT %0(i64) + S_ENDPGM 0, implicit %1(i192) ... --- @@ -549,13 +549,13 @@ body: | ; CHECK-LABEL: name: test_zext_s64_to_s256 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[COPY]](i64), [[C]](i64), [[C]](i64), [[C]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i256) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i256) = G_ZEXT %0(i64) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -567,13 +567,13 @@ body: | ; CHECK-LABEL: name: test_zext_s64_to_s512 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s512) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s512) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i512) = G_MERGE_VALUES [[COPY]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i512) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i512) = G_ZEXT %0(i64) + S_ENDPGM 0, implicit %1(i512) ... --- @@ -585,13 +585,13 @@ body: | ; CHECK-LABEL: name: test_zext_s64_to_s1024 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s1024) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s1024) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i1024) = G_MERGE_VALUES [[COPY]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64), [[C]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i1024) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i1024) = G_ZEXT %0(i64) + S_ENDPGM 0, implicit %1(i1024) ... --- @@ -603,16 +603,16 @@ body: | ; CHECK-LABEL: name: test_zext_s96_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[C]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128) - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s128) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[C]](i32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[MV]](i64), [[MV1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](i128) + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i128) = G_ZEXT %0(i96) + S_ENDPGM 0, implicit %1(i128) ... --- @@ -624,14 +624,14 @@ body: | ; CHECK-LABEL: name: test_zext_s128_to_s256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[C]](s64), [[C]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s256) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i256) = G_MERGE_VALUES [[UV]](i64), [[UV1]](i64), [[C]](i64), [[C]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i256) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i256) = G_ZEXT %0(i128) + S_ENDPGM 0, implicit %1(i256) ... --- @@ -643,46 +643,46 @@ body: | ; CHECK-LABEL: name: test_zext_s32_to_s88 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[C5]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C5]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](s88) - %0:_(s32) = COPY $vgpr0 - %1:_(s88) = G_ZEXT %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C2]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i16) = G_AND [[TRUNC]], [[C3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i16) = G_AND [[TRUNC1]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i16) = G_SHL [[AND1]], [[C4]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR1]](i32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i16) = G_AND [[TRUNC2]], [[C3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR2]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i16) = G_SHL [[TRUNC3]], [[C4]](i16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i16) = G_SHL [[C5]], [[C4]](i16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i16) = G_OR [[C5]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i16) = COPY [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(i32) = G_ZEXT [[OR1]](i16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[ZEXT1]], [[C1]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[ZEXT]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(i32) = G_ZEXT [[OR2]](i16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(i32) = G_ZEXT [[COPY1]](i16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[ZEXT3]], [[C1]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[ZEXT2]], [[SHL4]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR3]](i32), [[OR4]](i32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i704) = G_MERGE_VALUES [[MV]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64), [[C6]](i64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(i88) = G_TRUNC [[MV1]](i704) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](i88) + %0:_(i32) = COPY $vgpr0 + %1:_(i88) = G_ZEXT %0(i32) + S_ENDPGM 0, implicit %1(i88) ... # The instruction count blows up for this and takes too long to @@ -694,8 +694,8 @@ body: | # bb.0: # liveins: $vgpr0 -# %0:_(s32) = COPY $vgpr0 -# %1:_(s65) = G_ZEXT %0 +# %0:_(i32) = COPY $vgpr0 +# %1:_(i65) = G_ZEXT %0 # S_ENDPGM 0, implicit %1 # ... @@ -708,67 +708,67 @@ body: | ; CHECK-LABEL: name: test_zext_s2_to_s112 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV]], [[SHL2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[SHL3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[C3]](s64) - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND2]](s64) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL4]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C1]] - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) - %0:_(s32) = COPY $vgpr0 - %1:_(s2) = G_TRUNC %0 - %2:_(s112) = G_ZEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[LSHR]], [[C]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C4]](i64) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(i32) = G_LSHR [[UV]], [[C]](i32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(i32) = G_SHL [[LSHR1]], [[C]](i32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i32) = G_OR [[UV]], [[SHL2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(i32) = G_SHL [[C2]], [[C]](i32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i32) = G_OR [[UV1]], [[SHL3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR3]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY [[MV]](i64) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i48) = G_EXTRACT [[DEF]](i64), 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY [[C3]](i64) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(i48) = G_EXTRACT [[MV1]](i64), 0 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[EXTRACT]](i48) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i64) = G_ANYEXT [[EXTRACT1]](i48) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND2]](i64) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(i32) = G_LSHR [[UV2]], [[C]](i32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[DEF1]](i64) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(i32) = G_LSHR [[UV4]], [[C]](i32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i32) = G_AND [[UV2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(i32) = G_SHL [[LSHR2]], [[C]](i32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i32) = G_OR [[AND3]], [[SHL4]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i32) = G_AND [[UV3]], [[C1]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(i32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR4]](i32), [[OR5]](i32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i32) = G_AND [[UV4]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(i32) = G_SHL [[AND5]], [[C]](i32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i32) = G_AND [[UV5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(i32) = G_SHL [[AND6]], [[C]](i32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(i32) = G_OR [[LSHR3]], [[SHL6]] + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR6]](i32), [[OR7]](i32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[OR8]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i32) = COPY [[OR7]](i32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[COPY3]](i32), [[UV4]](i32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(i32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(i32) = G_OR [[C2]], [[SHL3]] + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[OR9]](i32), [[OR10]](i32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(i384) = G_MERGE_VALUES [[AND1]](i64), [[MV2]](i64), [[MV3]](i64), [[MV4]](i64), [[MV5]](i64), [[MV6]](i64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i112) = G_TRUNC [[MV7]](i384) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i112) + %0:_(i32) = COPY $vgpr0 + %1:_(i2) = G_TRUNC %0(i32) + %2:_(i112) = G_ZEXT %1(i2) + S_ENDPGM 0, implicit %2(i112) ... --- @@ -779,16 +779,16 @@ body: | ; CHECK-LABEL: name: test_zext_s112_to_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s112) = G_TRUNC %0 - %2:_(s128) = G_ZEXT %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 281474976710655 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i64), [[UV1:%[0-9]+]]:_(i64) = G_UNMERGE_VALUES [[COPY]](i128) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[UV1]], [[C1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(i128) = G_MERGE_VALUES [[AND]](i64), [[AND1]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i112) = G_TRUNC %0(i128) + %2:_(i128) = G_ZEXT %1(i112) + S_ENDPGM 0, implicit %2(i128) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir index a4971e94e75f6..6b3fb4cf9e1df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir @@ -13,15 +13,15 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p6) :: (load (i32), addrspace 6) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -34,15 +34,15 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), align 2, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p6) :: (load (i32), align 2, addrspace 6) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -55,15 +55,15 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[MV]](p4) :: (load (i32), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p6) :: (load (i32), align 1, addrspace 6) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -76,14 +76,14 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[MV]](p4) :: (load (i8), addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p6) :: (load (i8), addrspace 6) + $vgpr0 = COPY %1(i32) ... --- @@ -96,14 +96,14 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[MV]](p4) :: (load (i16), addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p6) :: (load (i16), addrspace 6) + $vgpr0 = COPY %1(i32) ... --- @@ -116,12 +116,12 @@ body: | ; CI: liveins: $sgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 - ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) - ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(i32) = G_PTRTOINT [[COPY]](p6) + ; CI-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 0 + ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](i32), [[C]](i32) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[MV]](p4) :: (load (i16), align 1, addrspace 6) + ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p6) :: (load (i16), align 1, addrspace 6) + $vgpr0 = COPY %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir index 9eade36f055de..ce895ea7f34da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir @@ -11,17 +11,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; VI-LABEL: name: test_zextload_flat_i32_i8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p0) :: (load (i8)) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_flat_i32_i16 @@ -33,17 +34,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; VI-LABEL: name: test_zextload_flat_i32_i16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) - %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 0) - $vgpr0 = COPY %1 + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_ZEXTLOAD %0(p0) :: (load (i16)) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_flat_i31_i8 @@ -55,18 +57,19 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; VI-LABEL: name: test_zextload_flat_i31_i8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_ZEXTLOAD %0(p0) :: (load (i8)) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_zextload_flat_i64_i8 @@ -78,19 +81,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; VI-LABEL: name: test_zextload_flat_i64_i8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i8)) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p0) :: (load (i8)) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_flat_i64_i16 @@ -102,19 +106,20 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; VI-LABEL: name: test_zextload_flat_i64_i16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p0) :: (load (i16)) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p0) :: (load (i16)) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_flat_i64_i32 @@ -126,17 +131,18 @@ body: | ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; VI-LABEL: name: test_zextload_flat_i64_i32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p0) :: (load (i32)) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 0) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p0) :: (load (i32)) + $vgpr0_vgpr1 = COPY %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir index 84608f61b8a4a..842a920f2c089 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -6,12 +6,12 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_zextload_global_v2i16_from_2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_zextload_global_v2i32_from_2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_zextload_global_v2i32_from_4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_zextload_global_v2i64_from_4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x s32>), addrspace 1) (in function: test_zextload_global_v2i64_from_8) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_ZEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_zextload_global_s128_8) +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x i16>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x i8>), addrspace 1) (in function: test_zextload_global_v2i16_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i32>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x i8>), addrspace 1) (in function: test_zextload_global_v2i32_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i32>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x i16>), addrspace 1) (in function: test_zextload_global_v2i32_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i64>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x i16>), addrspace 1) (in function: test_zextload_global_v2i64_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x i64>) = G_ZEXTLOAD %0:_(p1) :: (load (<2 x i32>), addrspace 1) (in function: test_zextload_global_v2i64_from_8) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(i128) = G_ZEXTLOAD %0:_(p1) :: (load (i64), addrspace 1) (in function: test_zextload_global_s128_8) # ERR-NOT: remark --- @@ -24,19 +24,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1 - ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 1 + ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1 - ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 1 + ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i1), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -49,19 +50,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7 - ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 7 + ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i7 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7 - ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 7 + ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i7), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -74,29 +76,30 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX8-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX8-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX8-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX8-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX8-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i24 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 2, align 2, addrspace 1) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i24), align 4, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -109,19 +112,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30 - ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 30 + ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i30 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30 - ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 30 + ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i30), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -134,19 +138,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31 - ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 31 + ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i31 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31 - ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(i32) = G_ASSERT_ZEXT [[LOAD]], 31 + ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i31), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -159,17 +164,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i8), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_global_i32_i16 @@ -181,17 +187,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i32_i16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i16), addrspace 1) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_global_i31_i8 @@ -203,18 +210,19 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_i31_i8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_ZEXTLOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_zextload_global_i64_i8 @@ -226,19 +234,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; GFX6-LABEL: name: test_zextload_global_i64_i8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p1) :: (load (i8), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_global_i64_i16 @@ -250,19 +259,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; GFX6-LABEL: name: test_zextload_global_i64_i16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p1) :: (load (i16), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_global_i64_i32 @@ -274,19 +284,20 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; GFX6-LABEL: name: test_zextload_global_i64_i32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p1) :: (load (i32), addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -299,23 +310,24 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) + ; ; GFX6-LABEL: name: test_zextload_global_s32_from_2_align1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6-NEXT: $vgpr0 = COPY [[OR]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (load (i16), align 1, addrspace 1) + $vgpr0 = COPY %1(i32) ... --- @@ -328,25 +340,26 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 1, addrspace 1) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + ; ; GFX6-LABEL: name: test_zextload_global_s64_from_2_align1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 1 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (i8) from unknown-address + 1, addrspace 1) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(i32) = G_SHL [[ZEXTLOAD1]], [[C1]](i32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[OR]](i32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p1) :: (load (i16), align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -359,17 +372,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](<2 x i16>) + ; ; GFX6-LABEL: name: test_zextload_global_v2i16_from_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](<2 x i16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_ZEXTLOAD %0 :: (load (<2 x s8>), addrspace 1) - $vgpr0 = COPY %1 + %1:_(<2 x i16>) = G_ZEXTLOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -382,17 +396,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x i32>) + ; ; GFX6-LABEL: name: test_zextload_global_v2i32_from_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i8>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load (<2 x s8>), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_ZEXTLOAD %0(p1) :: (load (<2 x i8>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -405,17 +420,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x i32>) + ; ; GFX6-LABEL: name: test_zextload_global_v2i32_from_4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x i32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load (<2 x s16>), addrspace 1) - $vgpr0_vgpr1 = COPY %1 + %1:_(<2 x i32>) = G_ZEXTLOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -428,17 +444,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x i64>) + ; ; GFX6-LABEL: name: test_zextload_global_v2i64_from_4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i16>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load (<2 x s16>), addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_ZEXTLOAD %0(p1) :: (load (<2 x i16>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -451,17 +468,18 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x i64>) + ; ; GFX6-LABEL: name: test_zextload_global_v2i64_from_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x i64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x i32>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x i64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load (<2 x s32>), addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(<2 x i64>) = G_ZEXTLOAD %0(p1) :: (load (<2 x i32>), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- @@ -474,15 +492,16 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i128) = G_ZEXTLOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](i128) + ; ; GFX6-LABEL: name: test_zextload_global_s128_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128) + ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i128) = G_ZEXTLOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](i128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_ZEXTLOAD %0 :: (load (s64), addrspace 1) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + %1:_(i128) = G_ZEXTLOAD %0(p1) :: (load (i64), addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir index 5b02cac394248..27e76bbb35430 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir @@ -11,11 +11,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_local_i32_i16 @@ -27,11 +27,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_local_i31_i8 @@ -43,12 +43,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p3) = COPY $vgpr0 - %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_ZEXTLOAD %0(p3) :: (load (i8), addrspace 3) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_zextload_local_i64_i8 @@ -60,12 +60,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p3) :: (load (i8), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_local_i64_i16 @@ -77,12 +77,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p3) :: (load (i16), addrspace 3) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p3) :: (load (i16), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_local_i64_i32 @@ -94,10 +94,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p3) :: (load (i32), addrspace 3) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 3) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p3) :: (load (i32), addrspace 3) + $vgpr0_vgpr1 = COPY %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir index 75e3c630e4a66..f5ee0dd5c5b1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir @@ -12,11 +12,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_private_i32_i16 @@ -28,11 +28,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 5) - $vgpr0 = COPY %1 + %1:_(i32) = G_ZEXTLOAD %0(p5) :: (load (i16), addrspace 5) + $vgpr0 = COPY %1(i32) ... --- name: test_zextload_private_i31_i8 @@ -44,12 +44,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p5) = COPY $vgpr0 - %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5) - %2:_(s32) = G_ANYEXT %1 - $vgpr0 = COPY %2 + %1:_(i31) = G_ZEXTLOAD %0(p5) :: (load (i8), addrspace 5) + %2:_(i32) = G_ANYEXT %1(i31) + $vgpr0 = COPY %2(i32) ... --- name: test_zextload_private_i64_i8 @@ -61,12 +61,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i8), addrspace 5) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p5) :: (load (i8), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_private_i64_i16 @@ -78,12 +78,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p5) :: (load (i16), addrspace 5) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[ZEXTLOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p5) :: (load (i16), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... --- name: test_zextload_private_i64_i32 @@ -95,10 +95,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p5) :: (load (i32), addrspace 5) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[LOAD]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 5) - $vgpr0_vgpr1 = COPY %1 + %1:_(i64) = G_ZEXTLOAD %0(p5) :: (load (i32), addrspace 5) + $vgpr0_vgpr1 = COPY %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index 292fa4be1ca1d..4147a2aaa1331 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -21,55 +21,59 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i32_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i32_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i32_1d ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0 ; GFX11-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i32_1d ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_]].sub0 ; GFX12-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32), addrspace 8) - $vgpr0 = COPY %3(s32) + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x i32>), $noreg, %2(i32), %0(<8 x i32>), 0, 0, 0 :: (volatile dereferenceable load store (i32), addrspace 8) + $vgpr0 = COPY %3(i32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -89,44 +93,48 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i32), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32), addrspace 8) + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x i32>), $noreg, %2(i32), %0(<8 x i32>), 0, 0, 0 :: (volatile dereferenceable load store (i32), addrspace 8) S_ENDPGM 0 ... @@ -146,55 +154,59 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX8-LABEL: name: atomic_cmpswap_i64_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX10-LABEL: name: atomic_cmpswap_i64_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX11-LABEL: name: atomic_cmpswap_i64_1d ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_]].sub0_sub1 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX12-LABEL: name: atomic_cmpswap_i64_1d ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_]].sub0_sub1 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %2:vgpr(s32) = COPY $vgpr4 - %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64), addrspace 8) - $vgpr0_vgpr1 = COPY %3(s64) + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %2:vgpr(i32) = COPY $vgpr4 + %3:vgpr(i64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x i64>), $noreg, %2(i32), %0(<8 x i32>), 0, 0, 0 :: (volatile dereferenceable load store (i64), addrspace 8) + $vgpr0_vgpr1 = COPY %3(i64) SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ... @@ -214,43 +226,47 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (i64), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 - %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %2:vgpr(s32) = COPY $vgpr4 - %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64), addrspace 8) + %0:sgpr(<8 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %2:vgpr(i32) = COPY $vgpr4 + %3:vgpr(i64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x i64>), $noreg, %2(i32), %0(<8 x i32>), 0, 0, 0 :: (volatile dereferenceable load store (i64), addrspace 8) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir index cbfa1c7c741b8..039252039fe07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir @@ -20,13 +20,16 @@ body: | ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 bb.0: + successors: %bb.1(0x80000000) + %0:_(p0) = G_CONSTANT i64 0 G_BR %bb.1 bb.1: ADJCALLSTACKUP 0, 0, implicit-def $scc - $sgpr30_sgpr31 = G_SI_CALL %0, 0, csr_amdgpu + $sgpr30_sgpr31 = G_SI_CALL %0(p0), 0, csr_amdgpu ADJCALLSTACKDOWN 0, 0, implicit-def $scc S_SETPC_B64_return undef $sgpr30_sgpr31 + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.mir index 72f7ac3f0bf38..8c87b2ec1f194 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.mir @@ -23,20 +23,23 @@ body: | ; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MAD_MIX_F32_]], implicit $exec ; GFX9-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 - %3:sgpr(s32) = G_LSHR %0:sgpr, %2:sgpr(s32) - %4:sgpr(s16) = G_TRUNC %3:sgpr(s32) - %5:sgpr(s32) = G_LSHR %1:sgpr, %2:sgpr(s32) - %6:sgpr(s16) = G_TRUNC %5:sgpr(s32) - %7:vgpr(s16) = COPY %4:sgpr(s16) - %8:vgpr(s32) = G_FPEXT %7:vgpr(s16) - %9:vgpr(s16) = COPY %6:sgpr(s16) - %10:vgpr(s32) = G_FPEXT %9:vgpr(s16) - %11:vgpr(s32) = G_FNEG %10:vgpr - %12:vgpr(s32) = G_FMAD %11:vgpr, %10:vgpr, %8:vgpr - %13:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %12:vgpr(s32) - $sgpr0 = COPY %13:sgpr(s32) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = G_CONSTANT i32 16 + %3:sgpr(i32) = G_LSHR %0, %2(i32) + %4:sgpr(i16) = G_TRUNC %3(i32) + %5:sgpr(i32) = G_LSHR %1, %2(i32) + %6:sgpr(i16) = G_TRUNC %5(i32) + %7:vgpr(i16) = COPY %4(i16) + %8:vgpr(f16) = G_BITCAST %7(i16) + %9:vgpr(f32) = G_FPEXT %8(f16) + %10:vgpr(i16) = COPY %6(i16) + %11:vgpr(f16) = G_BITCAST %10(i16) + %12:vgpr(f32) = G_FPEXT %11(f16) + %13:vgpr(f32) = G_FNEG %12 + %14:vgpr(f32) = G_FMAD %13, %12, %9 + %15:vgpr(i32) = G_BITCAST %14(f32) + %16:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %15(i32) + $sgpr0 = COPY %16(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.mir index 279b65b5baabf..840924bb63f53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.mir @@ -10,23 +10,23 @@ body: | ; GFX10-LABEL: name: v_mul_i64_no_zext ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[ADD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -38,21 +38,21 @@ body: | ; GFX10-LABEL: name: v_mul_i64_zext_src1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[COPY1]], [[C]] - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[COPY1]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_ZEXT %1(s32) - %3:_(s64) = G_MUL %0, %2 - $vgpr0_vgpr1 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr2 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[COPY1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[MUL]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[ADD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_ZEXT %1(i32) + %3:_(i64) = G_MUL %0, %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -64,21 +64,21 @@ body: | ; GFX10-LABEL: name: v_mul_i64_zext_src0 ; GFX10: liveins: $vgpr0, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[UV2]], [[C]] - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[UV3]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_ZEXT %0(s32) - %3:_(s64) = G_MUL %2, %1 - $vgpr0_vgpr1 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[COPY]](i32), [[UV]], [[C]] + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[UV1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV3]], [[MUL]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV2]](i32), [[ADD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_ZEXT %0(i32) + %3:_(i64) = G_MUL %2, %1 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -90,15 +90,15 @@ body: | ; GFX10-LABEL: name: v_mul_i64_zext_src0_src1 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_ZEXT %0(s32) - %3:_(s64) = G_ZEXT %1(s32) - %4:_(s64) = G_MUL %2, %3 - $vgpr0_vgpr1 = COPY %3 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_ZEXT %0(i32) + %3:_(i64) = G_ZEXT %1(i32) + %4:_(i64) = G_MUL %2, %3 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -110,25 +110,25 @@ body: | ; GFX10-LABEL: name: v_mul_i64_masked_src0_hi ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C1]] - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_CONSTANT i64 4294967295 - %3:_(s64) = G_AND %0, %2 - %4:_(s64) = G_MUL %3, %1 - $vgpr0_vgpr1 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4294967295 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C1]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[MUL]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[ADD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_CONSTANT i64 4294967295 + %3:_(i64) = G_AND %0, %2 + %4:_(i64) = G_MUL %3, %1 + $vgpr0_vgpr1 = COPY %4(i64) ... --- name: v_mul_i64_masked_src0_lo @@ -139,23 +139,23 @@ body: | ; GFX10-LABEL: name: v_mul_i64_masked_src0_lo ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[MUL]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_CONSTANT i64 -4294967296 - %3:_(s64) = G_AND %0, %2 - %4:_(s64) = G_MUL %3, %1 - $vgpr0_vgpr1 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -4294967296 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C1]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[MUL]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_CONSTANT i64 -4294967296 + %3:_(i64) = G_AND %0, %2 + %4:_(i64) = G_MUL %3, %1 + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -167,23 +167,23 @@ body: | ; GFX10-LABEL: name: v_mul_i64_masked_src1_lo ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[MUL]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_CONSTANT i64 -4294967296 - %3:_(s64) = G_AND %1, %2 - %4:_(s64) = G_MUL %0, %3 - $vgpr0_vgpr1 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -4294967296 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C1]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[MUL]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_CONSTANT i64 -4294967296 + %3:_(i64) = G_AND %1, %2 + %4:_(i64) = G_MUL %0, %3 + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -195,14 +195,14 @@ body: | ; GFX10-LABEL: name: v_mul_i64_masked_src0 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64) = G_AND %0, %2 - %4:_(s64) = G_MUL %3, %1 - $vgpr0_vgpr1 = COPY %4 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[C]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_CONSTANT i64 0 + %3:_(i64) = G_AND %0, %2 + %4:_(i64) = G_MUL %3, %1 + $vgpr0_vgpr1 = COPY %4(i64) ... --- name: v_mul_i64_partially_masked_src0 @@ -213,27 +213,27 @@ body: | ; GFX10-LABEL: name: v_mul_i64_partially_masked_src0 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 263951509094400 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C1]] - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD1]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_CONSTANT i64 263951509094400 - %3:_(s64) = G_AND %0, %2 - %4:_(s64) = G_MUL %3, %1 - $vgpr0_vgpr1 = COPY %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 263951509094400 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AND]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C1]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[MUL]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(i32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[ADD1]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_CONSTANT i64 263951509094400 + %3:_(i64) = G_AND %0, %2 + %4:_(i64) = G_MUL %3, %1 + $vgpr0_vgpr1 = COPY %4(i64) ... --- name: v_mul_i64_constant_hi @@ -244,19 +244,19 @@ body: | ; GFX10-LABEL: name: v_mul_i64_constant_hi ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[MUL]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 -4294967296 - %2:_(s64) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -4294967296 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C1]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[MUL]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 -4294967296 + %2:_(i64) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- name: v_mul_i64_constant_lo @@ -267,19 +267,19 @@ body: | ; GFX10-LABEL: name: v_mul_i64_constant_lo ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C1]] - ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 4294967295 - %2:_(s64) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 4294967295 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[C]](i64) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(i1) = G_AMDGPU_MAD_U64_U32 [[UV]](i32), [[UV2]], [[C1]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](i64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[UV5]], [[MUL]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[UV4]](i32), [[ADD]](i32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 4294967295 + %2:_(i64) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-ctlz-from-umul-to-lshr-in-postlegalizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-ctlz-from-umul-to-lshr-in-postlegalizer.mir index 00ead74cb37bb..da75bf16efea5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-ctlz-from-umul-to-lshr-in-postlegalizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-ctlz-from-umul-to-lshr-in-postlegalizer.mir @@ -11,13 +11,13 @@ body: | ; CHECK-LABEL: name: test ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMULH]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(i32) = G_UMULH [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMULH]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 4 - %2:_(s32) = G_UMULH %0:_, %1:_ - $vgpr0 = COPY %2:_(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 4 + %2:_(i32) = G_UMULH %0, %1 + $vgpr0 = COPY %2(i32) SI_RETURN implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir index ddbd69cc81047..2a127fae635a4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir @@ -1,9 +1,9 @@ # RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s -# CHECK: %1:_(<8 x s32>) = G_LOAD %0(p1) :: (load monotonic (<8 x s32>), addrspace 1) -# CHECK: G_STORE %1(<8 x s32>), %0(p1) :: (store monotonic (<8 x s32>), addrspace 1) -# CHECK: %1:_(s256) = G_LOAD %0(p1) :: (load monotonic (s256), addrspace 1) -# CHECK: G_STORE %1(s256), %0(p1) :: (store monotonic (s256), addrspace 1) +# CHECK: %1:_(<8 x i32>) = G_LOAD %0(p1) :: (load monotonic (<8 x i32>), addrspace 1) +# CHECK: G_STORE %1(<8 x i32>), %0(p1) :: (store monotonic (<8 x i32>), addrspace 1) +# CHECK: %1:_(i256) = G_LOAD %0(p1) :: (load monotonic (i256), addrspace 1) +# CHECK: G_STORE %1(i256), %0(p1) :: (store monotonic (i256), addrspace 1) --- name: test_atomic_load_global_v8s32 @@ -11,7 +11,7 @@ body: | bb.0: liveins: $vgpr0_vgpr1 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load monotonic (<8 x s32>), addrspace 1, align 32) + %1:_(<8 x i32>) = G_LOAD %0 :: (load monotonic (<8 x i32>), addrspace 1, align 32) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -21,8 +21,8 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store monotonic (<8 x s32>), addrspace 1, align 32) + %1:_(<8 x i32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1, %0 :: (store monotonic (<8 x i32>), addrspace 1, align 32) ... --- @@ -31,7 +31,7 @@ body: | bb.0: liveins: $vgpr0_vgpr1 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load monotonic (s256), addrspace 1, align 32) + %1:_(i256) = G_LOAD %0 :: (load monotonic (i256), addrspace 1, align 32) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -41,6 +41,6 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store monotonic (s256), addrspace 1, align 32) + %1:_(i256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + G_STORE %1, %0 :: (store monotonic (i256), addrspace 1, align 32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir index 33e484682eb31..85bba60fbe926 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir @@ -11,14 +11,14 @@ body: | ; CHECK-LABEL: name: v2s16_trunc_same_bitcast_lshr16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %bitcast:_(s32) = G_BITCAST %src - %lshr_amount:_(s32) = G_CONSTANT i32 16 - %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount - %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast, %lshr - $vgpr0 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + %lshr_amount:_(i32) = G_CONSTANT i32 16 + %lshr:_(i32) = G_LSHR %bitcast, %lshr_amount(i32) + %root:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %bitcast(i32), %lshr(i32) + $vgpr0 = COPY %root(<2 x i16>) ... --- @@ -31,15 +31,15 @@ body: | ; CHECK-LABEL: name: v2s16_trunc_different_bitcast_lshr16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %bitcast0:_(s32) = G_BITCAST %src - %bitcast1:_(s32) = G_BITCAST %src - %lshr_amount:_(s32) = G_CONSTANT i32 16 - %lshr:_(s32) = G_LSHR %bitcast1, %lshr_amount - %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast0, %lshr - $vgpr0 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %bitcast0:_(i32) = G_BITCAST %src(<2 x i16>) + %bitcast1:_(i32) = G_BITCAST %src(<2 x i16>) + %lshr_amount:_(i32) = G_CONSTANT i32 16 + %lshr:_(i32) = G_LSHR %bitcast1, %lshr_amount(i32) + %root:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %bitcast0(i32), %lshr(i32) + $vgpr0 = COPY %root(<2 x i16>) ... --- @@ -52,18 +52,18 @@ body: | ; CHECK-LABEL: name: v2s16_trunc_same_bitcast_lshr8_nocombine ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: %bitcast:_(s32) = G_BITCAST %src(<2 x s16>) - ; CHECK-NEXT: %lshr_amount:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount(s32) - ; CHECK-NEXT: %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast(s32), %lshr(s32) - ; CHECK-NEXT: $vgpr0 = COPY %root(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %bitcast:_(s32) = G_BITCAST %src - %lshr_amount:_(s32) = G_CONSTANT i32 8 - %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount - %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast, %lshr - $vgpr0 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + ; CHECK-NEXT: %lshr_amount:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %bitcast, %lshr_amount(i32) + ; CHECK-NEXT: %root:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %bitcast(i32), %lshr(i32) + ; CHECK-NEXT: $vgpr0 = COPY %root(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + %lshr_amount:_(i32) = G_CONSTANT i32 8 + %lshr:_(i32) = G_LSHR %bitcast, %lshr_amount(i32) + %root:_(<2 x i16>) = G_BUILD_VECTOR_TRUNC %bitcast(i32), %lshr(i32) + $vgpr0 = COPY %root(<2 x i16>) ... --- @@ -76,16 +76,16 @@ body: | ; CHECK-LABEL: name: v2s16_same_bitcast_lshr16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %bitcast:_(s32) = G_BITCAST %src - %lshr_amount:_(s32) = G_CONSTANT i32 16 - %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount - %trunclo:_(s16) = G_TRUNC %bitcast - %trunchi:_(s16) = G_TRUNC %lshr - %root:_(<2 x s16>) = G_BUILD_VECTOR %trunclo, %trunchi - $vgpr0 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + %lshr_amount:_(i32) = G_CONSTANT i32 16 + %lshr:_(i32) = G_LSHR %bitcast, %lshr_amount(i32) + %trunclo:_(i16) = G_TRUNC %bitcast(i32) + %trunchi:_(i16) = G_TRUNC %lshr(i32) + %root:_(<2 x i16>) = G_BUILD_VECTOR %trunclo(i16), %trunchi(i16) + $vgpr0 = COPY %root(<2 x i16>) ... --- @@ -98,22 +98,22 @@ body: | ; CHECK-LABEL: name: v2s16_same_bitcast_lshr8_nocombine ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: %bitcast:_(s32) = G_BITCAST %src(<2 x s16>) - ; CHECK-NEXT: %lshr_amount:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount(s32) - ; CHECK-NEXT: %trunclo:_(s16) = G_TRUNC %bitcast(s32) - ; CHECK-NEXT: %trunchi:_(s16) = G_TRUNC %lshr(s32) - ; CHECK-NEXT: %root:_(<2 x s16>) = G_BUILD_VECTOR %trunclo(s16), %trunchi(s16) - ; CHECK-NEXT: $vgpr0 = COPY %root(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %bitcast:_(s32) = G_BITCAST %src - %lshr_amount:_(s32) = G_CONSTANT i32 8 - %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount - %trunclo:_(s16) = G_TRUNC %bitcast - %trunchi:_(s16) = G_TRUNC %lshr - %root:_(<2 x s16>) = G_BUILD_VECTOR %trunclo, %trunchi - $vgpr0 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + ; CHECK-NEXT: %lshr_amount:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %lshr:_(i32) = G_LSHR %bitcast, %lshr_amount(i32) + ; CHECK-NEXT: %trunclo:_(i16) = G_TRUNC %bitcast(i32) + ; CHECK-NEXT: %trunchi:_(i16) = G_TRUNC %lshr(i32) + ; CHECK-NEXT: %root:_(<2 x i16>) = G_BUILD_VECTOR %trunclo(i16), %trunchi(i16) + ; CHECK-NEXT: $vgpr0 = COPY %root(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + %lshr_amount:_(i32) = G_CONSTANT i32 8 + %lshr:_(i32) = G_LSHR %bitcast, %lshr_amount(i32) + %trunclo:_(i16) = G_TRUNC %bitcast(i32) + %trunchi:_(i16) = G_TRUNC %lshr(i32) + %root:_(<2 x i16>) = G_BUILD_VECTOR %trunclo(i16), %trunchi(i16) + $vgpr0 = COPY %root(<2 x i16>) ... --- @@ -126,14 +126,14 @@ body: | ; CHECK-LABEL: name: v2s16_undefhi ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %bitcast:_(s32) = G_BITCAST %src - %trunc:_(s16) = G_TRUNC %bitcast - %undef:_(s16) = G_IMPLICIT_DEF - %root:_(<2 x s16>) = G_BUILD_VECTOR %trunc, %undef - $vgpr0 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %bitcast:_(i32) = G_BITCAST %src(<2 x i16>) + %trunc:_(i16) = G_TRUNC %bitcast(i32) + %undef:_(i16) = G_IMPLICIT_DEF + %root:_(<2 x i16>) = G_BUILD_VECTOR %trunc(i16), %undef(i16) + $vgpr0 = COPY %root(<2 x i16>) ... --- @@ -146,12 +146,12 @@ body: | ; CHECK-LABEL: name: v2s32_undefhi ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %src(<2 x s32>) - %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %bitcast:_(s64) = G_BITCAST %src - %trunc:_(s32) = G_TRUNC %bitcast - %undef:_(s32) = G_IMPLICIT_DEF - %root:_(<2 x s32>) = G_BUILD_VECTOR %trunc, %undef - $vgpr0_vgpr1 = COPY %root + ; CHECK-NEXT: %src:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %src(<2 x i32>) + %src:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %bitcast:_(i64) = G_BITCAST %src(<2 x i32>) + %trunc:_(i32) = G_TRUNC %bitcast(i64) + %undef:_(i32) = G_IMPLICIT_DEF + %root:_(<2 x i32>) = G_BUILD_VECTOR %trunc(i32), %undef(i32) + $vgpr0_vgpr1 = COPY %root(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir index f4268716e24be..4f74936a33765 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir @@ -12,22 +12,22 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_srem_sdiv @@ -39,22 +39,22 @@ body: | ; CHECK-LABEL: name: test_srem_sdiv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_udiv_urem @@ -66,22 +66,22 @@ body: | ; CHECK-LABEL: name: test_udiv_urem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32) = G_UDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_UREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_UDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_UREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_UDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_UREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_urem_udiv @@ -93,22 +93,22 @@ body: | ; CHECK-LABEL: name: test_urem_udiv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %rem:_(s32) = G_UREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %div:_(s32) = G_UDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %rem:_(i32) = G_UREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %div:_(i32) = G_UDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %rem:_(i32) = G_UREM %src1, %src2 + G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %div:_(i32) = G_UDIV %src1, %src2 + G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_v2 @@ -120,22 +120,22 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_v2 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(<2 x s32>) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: %rem:_(<2 x s32>) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) - %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %div:_(<2 x i32>) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: %rem:_(<2 x i32>) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 - %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) - %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) + %div:_(<2 x i32>) = G_SDIV %src1, %src2 + G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %rem:_(<2 x i32>) = G_SREM %src1, %src2 + G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- name: test_udiv_urem_v2 @@ -147,22 +147,22 @@ body: | ; CHECK-LABEL: name: test_udiv_urem_v2 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(<2 x s32>) = G_UDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: %rem:_(<2 x s32>) = G_UREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) - %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %div:_(<2 x i32>) = G_UDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: %rem:_(<2 x i32>) = G_UREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 - %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) - %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) + %div:_(<2 x i32>) = G_UDIV %src1, %src2 + G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %rem:_(<2 x i32>) = G_UREM %src1, %src2 + G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- name: test_sdiv_srem_extra_sdiv @@ -174,28 +174,28 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_extra_sdiv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr3:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %div2:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div2(s32), %ptr3(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %div2:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div2(i32), %ptr3(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) - %div2:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %div2:_(i32) = G_SDIV %src1, %src2 + G_STORE %div2(i32), %ptr3(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_extra_srem @@ -207,26 +207,26 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_extra_srem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr3:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem2:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem2(s32), %ptr3(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem2:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem2(i32), %ptr3(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) - %rem2:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %rem2:_(i32) = G_SREM %src1, %src2 + G_STORE %rem2(i32), %ptr3(p1) :: (store (i32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir index cd3c0053d3784..5719853d3cba6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir @@ -11,15 +11,17 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %src0(i32) + ; CHECK-NEXT: %fcanonicalize0:_(f32) = G_FCANONICALIZE [[BITCAST]] ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 - %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 - %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0 + ; CHECK-NEXT: G_STORE %fcanonicalize0(f32), %ptr(p1) :: (store (f32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %src0(i32) + %fcanonicalize0:_(f32) = G_FCANONICALIZE %1 + %fcanonicalize1:_(f32) = G_FCANONICALIZE %fcanonicalize0 %ptr:_(p1) = G_IMPLICIT_DEF - G_STORE %fcanonicalize1, %ptr :: (store (s32), addrspace 1, align 4) + G_STORE %fcanonicalize1(f32), %ptr(p1) :: (store (f32), addrspace 1) ... --- @@ -32,16 +34,18 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_fcanonicalize_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %src0(i32) + ; CHECK-NEXT: %fcanonicalize0:_(f32) = G_FCANONICALIZE [[BITCAST]] ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 - %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 - %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0 - %fcanonicalize2:_(s32) = G_FCANONICALIZE %fcanonicalize1 + ; CHECK-NEXT: G_STORE %fcanonicalize0(f32), %ptr(p1) :: (store (f32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %src0(i32) + %fcanonicalize0:_(f32) = G_FCANONICALIZE %1 + %fcanonicalize1:_(f32) = G_FCANONICALIZE %fcanonicalize0 + %fcanonicalize2:_(f32) = G_FCANONICALIZE %fcanonicalize1 %ptr:_(p1) = G_IMPLICIT_DEF - G_STORE %fcanonicalize2, %ptr :: (store (s32), addrspace 1, align 4) + G_STORE %fcanonicalize2(f32), %ptr(p1) :: (store (f32), addrspace 1) ... --- @@ -55,22 +59,24 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_s32_multi_use ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 - ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %src0(i32) + ; CHECK-NEXT: %fcanonicalize0:_(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: G_STORE %fcanonicalize0(f32), %ptr(p1) :: (volatile store (f32), addrspace 1) + ; CHECK-NEXT: G_STORE %fcanonicalize0(f32), %ptr(p1) :: (volatile store (f32), addrspace 1) + ; CHECK-NEXT: G_STORE %fcanonicalize0(f32), %ptr(p1) :: (volatile store (f32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 %ptr:_(p1) = G_IMPLICIT_DEF + %2:_(f32) = G_BITCAST %src0(i32) + %fcanonicalize0:_(f32) = G_FCANONICALIZE %2 + G_STORE %fcanonicalize0(f32), %ptr(p1) :: (volatile store (f32), addrspace 1) + %fcanonicalize1:_(f32) = G_FCANONICALIZE %fcanonicalize0 + G_STORE %fcanonicalize1(f32), %ptr(p1) :: (volatile store (f32), addrspace 1) + G_STORE %fcanonicalize1(f32), %ptr(p1) :: (volatile store (f32), addrspace 1) - %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 - G_STORE %fcanonicalize0, %ptr :: (volatile store (s32), addrspace 1, align 4) - %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0 - G_STORE %fcanonicalize1, %ptr :: (volatile store (s32), addrspace 1, align 4) - G_STORE %fcanonicalize1, %ptr :: (volatile store (s32), addrspace 1, align 4) ... --- @@ -83,14 +89,18 @@ body: | ; CHECK-LABEL: name: test_fcanonicalize_copy_fcanonicalize_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f32) = G_BITCAST %src0(i32) + ; CHECK-NEXT: %fcanonicalize0:_(f32) = G_FCANONICALIZE [[BITCAST]] ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 - %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 - %copy:_(s32) = COPY %fcanonicalize0 - %fcanonicalize1:_(s32) = G_FCANONICALIZE %copy + ; CHECK-NEXT: G_STORE %fcanonicalize0(f32), %ptr(p1) :: (store (f32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %src0(i32) + %fcanonicalize0:_(f32) = G_FCANONICALIZE %1 + %3:_(i32) = G_BITCAST %fcanonicalize0(f32) + %copy:_(i32) = COPY %3(i32) + %5:_(f32) = G_BITCAST %copy(i32) + %fcanonicalize1:_(f32) = G_FCANONICALIZE %5 %ptr:_(p1) = G_IMPLICIT_DEF - G_STORE %fcanonicalize1, %ptr :: (store (s32), addrspace 1, align 4) + G_STORE %fcanonicalize1(f32), %ptr(p1) :: (store (f32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir index 9f6d85baddc32..ccdf7ddb59963 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir @@ -11,15 +11,15 @@ body: | ; CHECK-LABEL: name: test_freeze_freeze_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(i32) = G_FREEZE %src0 ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 - %freeze0:_(s32) = G_FREEZE %src0 - %freeze1:_(s32) = G_FREEZE %freeze0 + ; CHECK-NEXT: G_STORE %freeze0(i32), %ptr(p1) :: (store (i32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 + %freeze0:_(i32) = G_FREEZE %src0 + %freeze1:_(i32) = G_FREEZE %freeze0 %ptr:_(p1) = G_IMPLICIT_DEF - G_STORE %freeze1, %ptr :: (store (s32), addrspace 1, align 4) + G_STORE %freeze1(i32), %ptr(p1) :: (store (i32), addrspace 1) ... --- @@ -32,16 +32,16 @@ body: | ; CHECK-LABEL: name: test_freeze_freeze_freeze_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(i32) = G_FREEZE %src0 ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 - %freeze0:_(s32) = G_FREEZE %src0 - %freeze1:_(s32) = G_FREEZE %freeze0 - %freeze2:_(s32) = G_FREEZE %freeze1 + ; CHECK-NEXT: G_STORE %freeze0(i32), %ptr(p1) :: (store (i32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 + %freeze0:_(i32) = G_FREEZE %src0 + %freeze1:_(i32) = G_FREEZE %freeze0 + %freeze2:_(i32) = G_FREEZE %freeze1 %ptr:_(p1) = G_IMPLICIT_DEF - G_STORE %freeze2, %ptr :: (store (s32), addrspace 1, align 4) + G_STORE %freeze2(i32), %ptr(p1) :: (store (i32), addrspace 1) ... --- @@ -55,22 +55,22 @@ body: | ; CHECK-LABEL: name: test_freeze_freeze_s32_multi_use ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 - ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(i32) = G_FREEZE %src0 + ; CHECK-NEXT: G_STORE %freeze0(i32), %ptr(p1) :: (volatile store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %freeze0(i32), %ptr(p1) :: (volatile store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %freeze0(i32), %ptr(p1) :: (volatile store (i32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 %ptr:_(p1) = G_IMPLICIT_DEF + %freeze0:_(i32) = G_FREEZE %src0 + G_STORE %freeze0(i32), %ptr(p1) :: (volatile store (i32), addrspace 1) + %freeze1:_(i32) = G_FREEZE %freeze0 + G_STORE %freeze1(i32), %ptr(p1) :: (volatile store (i32), addrspace 1) + G_STORE %freeze1(i32), %ptr(p1) :: (volatile store (i32), addrspace 1) - %freeze0:_(s32) = G_FREEZE %src0 - G_STORE %freeze0, %ptr :: (volatile store (s32), addrspace 1, align 4) - %freeze1:_(s32) = G_FREEZE %freeze0 - G_STORE %freeze1, %ptr :: (volatile store (s32), addrspace 1, align 4) - G_STORE %freeze1, %ptr :: (volatile store (s32), addrspace 1, align 4) ... --- @@ -83,14 +83,14 @@ body: | ; CHECK-LABEL: name: test_freeze_copy_freeze_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: %src0:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(i32) = G_FREEZE %src0 ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1) - %src0:_(s32) = COPY $vgpr0 - %freeze0:_(s32) = G_FREEZE %src0 - %copy:_(s32) = COPY %freeze0 - %freeze1:_(s32) = G_FREEZE %copy + ; CHECK-NEXT: G_STORE %freeze0(i32), %ptr(p1) :: (store (i32), addrspace 1) + %src0:_(i32) = COPY $vgpr0 + %freeze0:_(i32) = G_FREEZE %src0 + %copy:_(i32) = COPY %freeze0(i32) + %freeze1:_(i32) = G_FREEZE %copy %ptr:_(p1) = G_IMPLICIT_DEF - G_STORE %freeze1, %ptr :: (store (s32), addrspace 1, align 4) + G_STORE %freeze1(i32), %ptr(p1) :: (store (i32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-reassoc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-reassoc.mir index 25aa329eabc47..5419bc6287b83 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-reassoc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-reassoc.mir @@ -12,19 +12,19 @@ body: | ; CHECK-LABEL: name: test_reassoc_infinite_loop ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i32) = G_ADD [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](i32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s1) = G_ICMP intpred(eq), %1(s32), %1 - %4:_(s32) = G_SELECT %3(s1), %2, %1 - %5:_(s32) = COPY %4(s32) - %6:_(s32) = G_ADD %0, %5 - %7:_(s32) = G_ADD %6, %2 - $vgpr0 = COPY %7(s32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i1) = G_ICMP intpred(eq), %1(i32), %1 + %4:_(i32) = G_SELECT %3(i1), %2, %1 + %5:_(i32) = COPY %4(i32) + %6:_(i32) = G_ADD %0, %5 + %7:_(i32) = G_ADD %6, %2 + $vgpr0 = COPY %7(i32) SI_RETURN implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir index 23b80528c80a9..4bc7d59b5b460 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[LOAD]], 8 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) - %2:_(s64) = G_SEXT_INREG %1, 8 - $vgpr0_vgpr1 = COPY %2 + %1:_(i64) = G_LOAD %0(p1) :: (load (i64), addrspace 1) + %2:_(i64) = G_SEXT_INREG %1, 8 + $vgpr0_vgpr1 = COPY %2(i64) ... # Legal to fold into sextload @@ -34,12 +34,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 8 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 8 + $vgpr0 = COPY %2(i32) ... --- @@ -53,13 +53,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 7 + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 7 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 7 + $vgpr0 = COPY %2(i32) ... --- @@ -73,13 +73,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 9 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[LOAD]], 9 + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 9 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 9 + $vgpr0 = COPY %2(i32) ... # Legal to fold into sextload @@ -94,12 +94,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 16 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 16 + $vgpr0 = COPY %2(i32) ... --- @@ -113,12 +113,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 8 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 8 + $vgpr0 = COPY %2(i32) ... --- @@ -132,12 +132,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (i8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 8 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (volatile load (i8), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 8 + $vgpr0 = COPY %2(i32) ... --- @@ -151,12 +151,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 16 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i16), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 16 + $vgpr0 = COPY %2(i32) ... --- @@ -170,10 +170,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(i32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (i16), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1) - %2:_(s32) = G_SEXT_INREG %1, 16 - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (volatile load (i16), addrspace 1) + %2:_(i32) = G_SEXT_INREG %1, 16 + $vgpr0 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir index 600cefbcc161a..5c33a319fb86f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir @@ -10,23 +10,23 @@ body: | ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 - %5:_(s32) = G_BITCAST %4 - %6:_(s16) = G_TRUNC %5 - %7:_(s16) = G_CONSTANT i16 42 - %8:_(s16) = G_OR %7, %6 - %9:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i16>) = G_BUILD_VECTOR %2(i16), %3(i16) + %5:_(i32) = G_BITCAST %4(<2 x i16>) + %6:_(i16) = G_TRUNC %5(i32) + %7:_(i16) = G_CONSTANT i16 42 + %8:_(i16) = G_OR %7, %6 + %9:_(i32) = G_ZEXT %8(i16) + $vgpr0 = COPY %9(i32) ... --- @@ -38,32 +38,32 @@ body: | ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector_shift8_nofold ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[C1]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 - %5:_(s32) = G_BITCAST %4 - %6:_(s32) = G_CONSTANT i32 8 - %7:_(s32) = G_LSHR %5, %6 - %8:_(s16) = G_TRUNC %7 - %9:_(s16) = G_CONSTANT i16 42 - %10:_(s16) = G_OR %9, %8 - %11:_(s32) = G_ZEXT %10 - $vgpr0 = COPY %11 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i16>) = G_BUILD_VECTOR [[TRUNC]](i16), [[TRUNC1]](i16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i32) = G_BITCAST [[BUILD_VECTOR]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i16>) = G_BUILD_VECTOR %2(i16), %3(i16) + %5:_(i32) = G_BITCAST %4(<2 x i16>) + %6:_(i32) = G_CONSTANT i32 8 + %7:_(i32) = G_LSHR %5, %6(i32) + %8:_(i16) = G_TRUNC %7(i32) + %9:_(i16) = G_CONSTANT i16 42 + %10:_(i16) = G_OR %9, %8 + %11:_(i32) = G_ZEXT %10(i16) + $vgpr0 = COPY %11(i32) ... --- @@ -75,25 +75,25 @@ body: | ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector_shift16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 - %5:_(s32) = G_BITCAST %4 - %6:_(s32) = G_CONSTANT i32 16 - %7:_(s32) = G_LSHR %5, %6 - %8:_(s16) = G_TRUNC %7 - %9:_(s16) = G_CONSTANT i16 42 - %10:_(s16) = G_OR %9, %8 - %11:_(s32) = G_ZEXT %10 - $vgpr0 = COPY %11 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(<2 x i16>) = G_BUILD_VECTOR %2(i16), %3(i16) + %5:_(i32) = G_BITCAST %4(<2 x i16>) + %6:_(i32) = G_CONSTANT i32 16 + %7:_(i32) = G_LSHR %5, %6(i32) + %8:_(i16) = G_TRUNC %7(i32) + %9:_(i16) = G_CONSTANT i16 42 + %10:_(i16) = G_OR %9, %8 + %11:_(i32) = G_ZEXT %10(i16) + $vgpr0 = COPY %11(i32) ... --- @@ -105,24 +105,24 @@ body: | ; CHECK-LABEL: name: s16_trunc_v2s32_buildvector_nofold ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - %3:_(s64) = G_BITCAST %2 - %4:_(s16) = G_TRUNC %3 - %5:_(s16) = G_CONSTANT i16 42 - %6:_(s16) = G_OR %5, %4 - %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i16) = G_TRUNC [[BITCAST]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i16) = G_OR [[TRUNC]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i32) = G_ZEXT [[OR]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + %3:_(i64) = G_BITCAST %2(<2 x i32>) + %4:_(i16) = G_TRUNC %3(i64) + %5:_(i16) = G_CONSTANT i16 42 + %6:_(i16) = G_OR %5, %4 + %7:_(i32) = G_ZEXT %6(i16) + $vgpr0 = COPY %7(i32) ... --- @@ -134,14 +134,14 @@ body: | ; CHECK-LABEL: name: s32_trunc_v2s32_buildvector ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - %3:_(s64) = G_BITCAST %2 - %4:_(s32) = G_TRUNC %3 - $vgpr0 = COPY %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + %3:_(i64) = G_BITCAST %2(<2 x i32>) + %4:_(i32) = G_TRUNC %3(i64) + $vgpr0 = COPY %4(i32) ... --- @@ -153,21 +153,21 @@ body: | ; CHECK-LABEL: name: s32_trunc_v2s32_buildvector_multiple_users ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[EVEC]](s32) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BITCAST]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - %3:_(s64) = G_BITCAST %2 - %4:_(s32) = G_TRUNC %3 - %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 - $vgpr0 = COPY %4 - $vgpr1 = COPY %5 - $vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(i64) = G_BITCAST [[BUILD_VECTOR]](<2 x i32>) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(i32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x i32>), [[COPY1]](i32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; CHECK-NEXT: $vgpr1 = COPY [[EVEC]](i32) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BITCAST]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + %3:_(i64) = G_BITCAST %2(<2 x i32>) + %4:_(i32) = G_TRUNC %3(i64) + %5:_(i32) = G_EXTRACT_VECTOR_ELT %2(<2 x i32>), %1(i32) + $vgpr0 = COPY %4(i32) + $vgpr1 = COPY %5(i32) + $vgpr2_vgpr3 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir index 4d8f8b0ec8821..0e70fba5030a2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir @@ -13,9 +13,9 @@ body: | %ptr1:_(p1) = COPY $vgpr0_vgpr1 %ptr2:_(p1) = COPY $vgpr2_vgpr3 %ptr3:_(p1) = COPY $vgpr4_vgpr5 - %vec:_(<3 x s32>) = G_IMPLICIT_DEF - %p1:_(s32), %p2:_(s32), %p3:_(s32) = G_UNMERGE_VALUES %vec - G_STORE %p1:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - G_STORE %p2:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) - G_STORE %p3:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) + %vec:_(<3 x i32>) = G_IMPLICIT_DEF + %p1:_(i32), %p2:_(i32), %p3:_(i32) = G_UNMERGE_VALUES %vec(<3 x i32>) + G_STORE %p1(i32), %ptr1(p1) :: (store (i32), addrspace 1) + G_STORE %p2(i32), %ptr2(p1) :: (store (i32), addrspace 1) + G_STORE %p3(i32), %ptr3(p1) :: (store (i32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir index 73e06de1923df..7ee5074b75924 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-zextload-from-and.mir @@ -13,15 +13,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CHECK-NEXT: %k:_(s64) = G_CONSTANT i64 4294967295 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CHECK-NEXT: %k:_(i64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], %k + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) - %k:_(s64) = G_CONSTANT i64 4294967295 - %2:_(s64) = G_AND %1, %k - $vgpr0_vgpr1 = COPY %2 + %1:_(i64) = G_LOAD %0(p1) :: (load (i64), addrspace 1) + %k:_(i64) = G_CONSTANT i64 4294967295 + %3:_(i64) = G_AND %1, %k + $vgpr0_vgpr1 = COPY %3(i64) ... # Legal to fold into zextload @@ -36,13 +36,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), align 4, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %k:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %k:_(i32) = G_CONSTANT i32 255 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... --- @@ -56,15 +56,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: %k:_(i32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], %k + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %k:_(s32) = G_CONSTANT i32 127 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %k:_(i32) = G_CONSTANT i32 127 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... --- @@ -78,15 +78,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], %k - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i32) = G_LOAD [[COPY]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: %k:_(i32) = G_CONSTANT i32 511 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[LOAD]], %k + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %k:_(s32) = G_CONSTANT i32 511 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %k:_(i32) = G_CONSTANT i32 511 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... # Legal to fold into zextload @@ -101,13 +101,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), align 4, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) - %k:_(s32) = G_CONSTANT i32 65535 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i32), addrspace 1) + %k:_(i32) = G_CONSTANT i32 65535 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... --- @@ -121,13 +121,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) - %k:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i8), addrspace 1) + %k:_(i32) = G_CONSTANT i32 255 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... --- @@ -141,13 +141,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (i8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1) - %k:_(s32) = G_CONSTANT i32 255 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (volatile load (i8), addrspace 1) + %k:_(i32) = G_CONSTANT i32 255 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... --- @@ -161,13 +161,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (load (i16), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) - %k:_(s32) = G_CONSTANT i32 65535 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (load (i16), addrspace 1) + %k:_(i32) = G_CONSTANT i32 65535 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... --- @@ -181,11 +181,11 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(i32) = G_ZEXTLOAD [[COPY]](p1) :: (volatile load (i16), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](i32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1) - %k:_(s32) = G_CONSTANT i32 65535 - %2:_(s32) = G_AND %1, %k - $vgpr0 = COPY %2 + %1:_(i32) = G_LOAD %0(p1) :: (volatile load (i16), addrspace 1) + %k:_(i32) = G_CONSTANT i32 65535 + %3:_(i32) = G_AND %1, %k + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir index fdc22a23f7416..a4599434996c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -14,13 +14,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %and:_(s32) = G_ZEXTLOAD %ptr(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY %and(s32) + ; CHECK-NEXT: %and:_(i32) = G_ZEXTLOAD %ptr(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY %and(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %load:_(s32) = G_ZEXTLOAD %ptr :: (load (s8), addrspace 1, align 1) - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %load, %mask - $vgpr0 = COPY %and + %load:_(i32) = G_ZEXTLOAD %ptr(p1) :: (load (i8), addrspace 1) + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %load, %mask + $vgpr0 = COPY %and(i32) ... @@ -36,18 +36,18 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %smin:_(s32) = G_SMIN %load0, %load1 - ; CHECK-NEXT: $vgpr0 = COPY %smin(s32) + ; CHECK-NEXT: %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %smin:_(i32) = G_SMIN %load0, %load1 + ; CHECK-NEXT: $vgpr0 = COPY %smin(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) - %smin:_(s32) = G_SMIN %load0, %load1 - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %smin, %mask - $vgpr0 = COPY %and + %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + %smin:_(i32) = G_SMIN %load0, %load1 + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %smin, %mask + $vgpr0 = COPY %and(i32) ... @@ -63,18 +63,18 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %smax:_(s32) = G_SMAX %load0, %load1 - ; CHECK-NEXT: $vgpr0 = COPY %smax(s32) + ; CHECK-NEXT: %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %smax:_(i32) = G_SMAX %load0, %load1 + ; CHECK-NEXT: $vgpr0 = COPY %smax(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) - %smax:_(s32) = G_SMAX %load0, %load1 - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %smax, %mask - $vgpr0 = COPY %and + %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + %smax:_(i32) = G_SMAX %load0, %load1 + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %smax, %mask + $vgpr0 = COPY %and(i32) ... @@ -90,18 +90,18 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %umin:_(s32) = G_UMIN %load0, %load1 - ; CHECK-NEXT: $vgpr0 = COPY %umin(s32) + ; CHECK-NEXT: %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %umin:_(i32) = G_UMIN %load0, %load1 + ; CHECK-NEXT: $vgpr0 = COPY %umin(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) - %umin:_(s32) = G_UMIN %load0, %load1 - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %umin, %mask - $vgpr0 = COPY %and + %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + %umin:_(i32) = G_UMIN %load0, %load1 + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %umin, %mask + $vgpr0 = COPY %and(i32) ... @@ -117,18 +117,18 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %umax:_(s32) = G_UMAX %load0, %load1 - ; CHECK-NEXT: $vgpr0 = COPY %umax(s32) + ; CHECK-NEXT: %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %umax:_(i32) = G_UMAX %load0, %load1 + ; CHECK-NEXT: $vgpr0 = COPY %umax(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) - %umax:_(s32) = G_UMAX %load0, %load1 - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %umax, %mask - $vgpr0 = COPY %and + %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + %umax:_(i32) = G_UMAX %load0, %load1 + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %umax, %mask + $vgpr0 = COPY %and(i32) ... @@ -145,20 +145,20 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %load0:_(s32) = G_LOAD %ptr0(p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %smin:_(s32) = G_SMIN %load0, %load1 - ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %and:_(s32) = G_AND %smin, %mask - ; CHECK-NEXT: $vgpr0 = COPY %and(s32) + ; CHECK-NEXT: %load0:_(i32) = G_LOAD %ptr0(p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %smin:_(i32) = G_SMIN %load0, %load1 + ; CHECK-NEXT: %mask:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %and:_(i32) = G_AND %smin, %mask + ; CHECK-NEXT: $vgpr0 = COPY %and(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_LOAD %ptr0 :: (load (s32), addrspace 1, align 4) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) - %smin:_(s32) = G_SMIN %load0, %load1 - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %smin, %mask - $vgpr0 = COPY %and + %load0:_(i32) = G_LOAD %ptr0(p1) :: (load (i32), addrspace 1) + %load1:_(i32) = G_ZEXTLOAD %ptr1(p1) :: (load (i8), addrspace 1) + %smin:_(i32) = G_SMIN %load0, %load1 + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %smin, %mask + $vgpr0 = COPY %and(i32) ... @@ -175,20 +175,20 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: %load1:_(s32) = G_LOAD %ptr1(p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: %smin:_(s32) = G_SMIN %load0, %load1 - ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %and:_(s32) = G_AND %smin, %mask - ; CHECK-NEXT: $vgpr0 = COPY %and(s32) + ; CHECK-NEXT: %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + ; CHECK-NEXT: %load1:_(i32) = G_LOAD %ptr1(p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: %smin:_(i32) = G_SMIN %load0, %load1 + ; CHECK-NEXT: %mask:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %and:_(i32) = G_AND %smin, %mask + ; CHECK-NEXT: $vgpr0 = COPY %and(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) - %load1:_(s32) = G_LOAD %ptr1 :: (load (s32), addrspace 1, align 4) - %smin:_(s32) = G_SMIN %load0, %load1 - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %smin, %mask - $vgpr0 = COPY %and + %load0:_(i32) = G_ZEXTLOAD %ptr0(p1) :: (load (i8), addrspace 1) + %load1:_(i32) = G_LOAD %ptr1(p1) :: (load (i32), addrspace 1) + %smin:_(i32) = G_SMIN %load0, %load1 + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %smin, %mask + $vgpr0 = COPY %and(i32) ... @@ -204,15 +204,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask - ; CHECK-NEXT: $vgpr0 = COPY %and(s32) + ; CHECK-NEXT: %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK-NEXT: %mask:_(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: %and:_(i32) = G_AND %lds_size, %mask + ; CHECK-NEXT: $vgpr0 = COPY %and(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - %mask:_(s32) = G_CONSTANT i32 65535 - %and:_(s32) = G_AND %lds_size, %mask - $vgpr0 = COPY %and + %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(i32) = G_CONSTANT i32 65535 + %and:_(i32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and(i32) ... @@ -227,13 +227,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - ; CHECK-NEXT: $vgpr0 = COPY %lds_size(s32) + ; CHECK-NEXT: %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK-NEXT: $vgpr0 = COPY %lds_size(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - %mask:_(s32) = G_CONSTANT i32 131071 - %and:_(s32) = G_AND %lds_size, %mask - $vgpr0 = COPY %and + %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(i32) = G_CONSTANT i32 131071 + %and:_(i32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and(i32) ... @@ -248,15 +248,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 65536 - ; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask - ; CHECK-NEXT: $vgpr0 = COPY %and(s32) + ; CHECK-NEXT: %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK-NEXT: %mask:_(i32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: %and:_(i32) = G_AND %lds_size, %mask + ; CHECK-NEXT: $vgpr0 = COPY %and(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - %mask:_(s32) = G_CONSTANT i32 65536 - %and:_(s32) = G_AND %lds_size, %mask - $vgpr0 = COPY %and + %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(i32) = G_CONSTANT i32 65536 + %and:_(i32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and(i32) ... @@ -271,15 +271,15 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 32767 - ; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask - ; CHECK-NEXT: $vgpr0 = COPY %and(s32) + ; CHECK-NEXT: %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK-NEXT: %mask:_(i32) = G_CONSTANT i32 32767 + ; CHECK-NEXT: %and:_(i32) = G_AND %lds_size, %mask + ; CHECK-NEXT: $vgpr0 = COPY %and(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - %mask:_(s32) = G_CONSTANT i32 32767 - %and:_(s32) = G_AND %lds_size, %mask - $vgpr0 = COPY %and + %lds_size:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(i32) = G_CONSTANT i32 32767 + %and:_(i32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and(i32) ... @@ -297,17 +297,17 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255 - ; CHECK-NEXT: $vgpr0 = COPY %umin0(s32) + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: %k255:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %umin0:_(i32) = G_UMIN %val, %k255 + ; CHECK-NEXT: $vgpr0 = COPY %umin0(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %val:_(s32) = COPY $vgpr4 - %k255:_(s32) = G_CONSTANT i32 255 - %umin0:_(s32) = G_UMIN %val, %k255 - %and:_(s32) = G_AND %umin0, %k255 - $vgpr0 = COPY %and + %val:_(i32) = COPY $vgpr4 + %k255:_(i32) = G_CONSTANT i32 255 + %umin0:_(i32) = G_UMIN %val, %k255 + %and:_(i32) = G_AND %umin0, %k255 + $vgpr0 = COPY %and(i32) ... @@ -324,16 +324,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %val:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255 - ; CHECK-NEXT: $vgpr0 = COPY %umin0(s32) + ; CHECK-NEXT: %val:_(i32) = COPY $vgpr4 + ; CHECK-NEXT: %k255:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %umin0:_(i32) = G_UMIN %val, %k255 + ; CHECK-NEXT: $vgpr0 = COPY %umin0(i32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %val:_(s32) = COPY $vgpr4 - %k255:_(s32) = G_CONSTANT i32 255 - %umin0:_(s32) = G_UMIN %k255, %val - %and:_(s32) = G_AND %umin0, %k255 - $vgpr0 = COPY %and + %val:_(i32) = COPY $vgpr4 + %k255:_(i32) = G_CONSTANT i32 255 + %umin0:_(i32) = G_UMIN %k255, %val + %and:_(i32) = G_AND %umin0, %k255 + $vgpr0 = COPY %and(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir index 61162db8e3622..915ce21d6bfd9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir @@ -10,12 +10,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_s32_s32 - ; CHECK: %shift:_(s32) = G_CONSTANT i32 -482254 - ; CHECK-NEXT: $vgpr0 = COPY %shift(s32) - %val:_(s32) = G_CONSTANT i32 -123456789 - %shift_amt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_ASHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %shift:_(i32) = G_CONSTANT i32 -482254 + ; CHECK-NEXT: $vgpr0 = COPY %shift(i32) + %val:_(i32) = G_CONSTANT i32 -123456789 + %shift_amt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_ASHR %val, %shift_amt(i32) + $vgpr0 = COPY %shift(i32) ... @@ -26,13 +26,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_s16_s16 - ; CHECK: %ext:_(s32) = G_CONSTANT i32 64764 - ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) - %val:_(s16) = G_CONSTANT i16 -12345 - %shift_amt:_(s16) = G_CONSTANT i16 4 - %shift:_(s16) = G_ASHR %val, %shift_amt - %ext:_(s32) = G_ANYEXT %shift - $vgpr0 = COPY %ext + ; CHECK: %ext:_(i32) = G_CONSTANT i32 64764 + ; CHECK-NEXT: $vgpr0 = COPY %ext(i32) + %val:_(i16) = G_CONSTANT i16 -12345 + %shift_amt:_(i16) = G_CONSTANT i16 4 + %shift:_(i16) = G_ASHR %val, %shift_amt(i16) + %ext:_(i32) = G_ANYEXT %shift(i16) + $vgpr0 = COPY %ext(i32) ... @@ -43,12 +43,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_s64_s32 - ; CHECK: %shift:_(s64) = G_CONSTANT i64 -482254 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %shift(s64) - %val:_(s64) = G_CONSTANT i64 -123456789 - %shift_amt:_(s32) = G_CONSTANT i32 8 - %shift:_(s64) = G_ASHR %val, %shift_amt - $vgpr0_vgpr1 = COPY %shift + ; CHECK: %shift:_(i64) = G_CONSTANT i64 -482254 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %shift(i64) + %val:_(i64) = G_CONSTANT i64 -123456789 + %shift_amt:_(i32) = G_CONSTANT i32 8 + %shift:_(i64) = G_ASHR %val, %shift_amt(i32) + $vgpr0_vgpr1 = COPY %shift(i64) ... @@ -59,20 +59,20 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_v2s16_v2s16 - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %val1:_(s16) = G_CONSTANT i16 -5678 - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %val1(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %shift_amt_elt(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_ASHR %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %val1:_(s16) = G_CONSTANT i16 -5678 - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %val1 - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %shift_amt_elt - %shift:_(<2 x s16>) = G_ASHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %val1:_(i16) = G_CONSTANT i16 -5678 + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_ASHR %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %val1:_(i16) = G_CONSTANT i16 -5678 + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + %shift:_(<2 x i16>) = G_ASHR %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... @@ -83,22 +83,22 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_v2s16_v2s16_undef_amt_elt - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %val1:_(s16) = G_CONSTANT i16 -5678 - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %val1(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %undef:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %undef(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_ASHR %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %val1:_(s16) = G_CONSTANT i16 -5678 - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %val1 - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %undef:_(s16) = G_IMPLICIT_DEF - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %undef - %shift:_(<2 x s16>) = G_ASHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %val1:_(i16) = G_CONSTANT i16 -5678 + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %undef:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %undef(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_ASHR %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %val1:_(i16) = G_CONSTANT i16 -5678 + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %undef:_(i16) = G_IMPLICIT_DEF + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %undef(i16) + %shift:_(<2 x i16>) = G_ASHR %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... @@ -109,19 +109,19 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_v2s16_v2s16_undef_val_elt - ; CHECK: %val0:_(s16) = G_CONSTANT i16 -1234 - ; CHECK-NEXT: %undef:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %undef(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %shift_amt_elt(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_ASHR %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 -1234 - %undef:_(s16) = G_IMPLICIT_DEF - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %undef - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %shift_amt_elt - %shift:_(<2 x s16>) = G_ASHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 -1234 + ; CHECK-NEXT: %undef:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %undef(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_ASHR %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 -1234 + %undef:_(i16) = G_IMPLICIT_DEF + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %undef(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + %shift:_(<2 x i16>) = G_ASHR %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-load-and-mask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-load-and-mask.mir index 63e9bbb7fcbb2..fde3151e6980c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-load-and-mask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-load-and-mask.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(i64) = G_LOAD [[COPY]](p1) :: (load (i64), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[LOAD]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 255 - %2:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) - %3:_(s64) = G_AND %2, %1 - $vgpr0_vgpr1 = COPY %3 + %1:_(i64) = G_CONSTANT i64 255 + %2:_(i64) = G_LOAD %0(p1) :: (load (i64), addrspace 1) + %3:_(i64) = G_AND %2, %1 + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir index 8830c08cb6150..55ecdc7616576 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir @@ -10,12 +10,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_s32_s32 - ; CHECK: %shift:_(s32) = G_CONSTANT i32 16294962 - ; CHECK-NEXT: $vgpr0 = COPY %shift(s32) - %val:_(s32) = G_CONSTANT i32 -123456789 - %shift_amt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_LSHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %shift:_(i32) = G_CONSTANT i32 16294962 + ; CHECK-NEXT: $vgpr0 = COPY %shift(i32) + %val:_(i32) = G_CONSTANT i32 -123456789 + %shift_amt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_LSHR %val, %shift_amt(i32) + $vgpr0 = COPY %shift(i32) ... @@ -26,13 +26,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_s16_s16 - ; CHECK: %ext:_(s32) = G_CONSTANT i32 3324 - ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) - %val:_(s16) = G_CONSTANT i16 -12345 - %shift_amt:_(s16) = G_CONSTANT i16 4 - %shift:_(s16) = G_LSHR %val, %shift_amt - %ext:_(s32) = G_ANYEXT %shift - $vgpr0 = COPY %ext + ; CHECK: %ext:_(i32) = G_CONSTANT i32 3324 + ; CHECK-NEXT: $vgpr0 = COPY %ext(i32) + %val:_(i16) = G_CONSTANT i16 -12345 + %shift_amt:_(i16) = G_CONSTANT i16 4 + %shift:_(i16) = G_LSHR %val, %shift_amt(i16) + %ext:_(i32) = G_ANYEXT %shift(i16) + $vgpr0 = COPY %ext(i32) ... @@ -43,12 +43,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_s64_s32 - ; CHECK: %shift:_(s64) = G_CONSTANT i64 72057594037445682 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %shift(s64) - %val:_(s64) = G_CONSTANT i64 -123456789 - %shift_amt:_(s32) = G_CONSTANT i32 8 - %shift:_(s64) = G_LSHR %val, %shift_amt - $vgpr0_vgpr1 = COPY %shift + ; CHECK: %shift:_(i64) = G_CONSTANT i64 72057594037445682 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %shift(i64) + %val:_(i64) = G_CONSTANT i64 -123456789 + %shift_amt:_(i32) = G_CONSTANT i32 8 + %shift:_(i64) = G_LSHR %val, %shift_amt(i32) + $vgpr0_vgpr1 = COPY %shift(i64) ... @@ -59,20 +59,20 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_v2s16_v2s16 - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %val1:_(s16) = G_CONSTANT i16 -5678 - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %val1(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %shift_amt_elt(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_LSHR %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %val1:_(s16) = G_CONSTANT i16 -5678 - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %val1 - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %shift_amt_elt - %shift:_(<2 x s16>) = G_LSHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %val1:_(i16) = G_CONSTANT i16 -5678 + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_LSHR %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %val1:_(i16) = G_CONSTANT i16 -5678 + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + %shift:_(<2 x i16>) = G_LSHR %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... @@ -83,22 +83,22 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_v2s16_v2s16_undef_amt_elt - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %val1:_(s16) = G_CONSTANT i16 -5678 - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %val1(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %undef:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %undef(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_LSHR %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %val1:_(s16) = G_CONSTANT i16 -5678 - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %val1 - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %undef:_(s16) = G_IMPLICIT_DEF - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %undef - %shift:_(<2 x s16>) = G_LSHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %val1:_(i16) = G_CONSTANT i16 -5678 + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %undef:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %undef(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_LSHR %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %val1:_(i16) = G_CONSTANT i16 -5678 + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %undef:_(i16) = G_IMPLICIT_DEF + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %undef(i16) + %shift:_(<2 x i16>) = G_LSHR %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... @@ -109,19 +109,19 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_v2s16_v2s16_undef_val_elt - ; CHECK: %val0:_(s16) = G_CONSTANT i16 -1234 - ; CHECK-NEXT: %undef:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %undef(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %shift_amt_elt(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_LSHR %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 -1234 - %undef:_(s16) = G_IMPLICIT_DEF - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %undef - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %shift_amt_elt - %shift:_(<2 x s16>) = G_LSHR %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 -1234 + ; CHECK-NEXT: %undef:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %undef(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_LSHR %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 -1234 + %undef:_(i16) = G_IMPLICIT_DEF + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %undef(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + %shift:_(<2 x i16>) = G_LSHR %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-mul.mir index 06b600d3f352a..f92030a294d42 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-mul.mir @@ -9,14 +9,14 @@ body: | ; CHECK-LABEL: name: mul_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12345 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 12345 - %2:_(s64) = G_MUL %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12345 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[COPY]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 12345 + %2:_(i64) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -27,16 +27,16 @@ body: | ; CHECK-LABEL: name: mul_s64_zext ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12345 - ; CHECK-NEXT: [[AMDGPU_:%[0-9]+]]:_(s64) = G_AMDGPU_S_MUL_U64_U32 [[ZEXT]], [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ZEXT %0 - %2:_(s64) = G_CONSTANT i64 12345 - %3:_(s64) = G_MUL %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(i64) = G_ZEXT [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12345 + ; CHECK-NEXT: [[AMDGPU_S_MUL_U64_U32_:%[0-9]+]]:_(i64) = G_AMDGPU_S_MUL_U64_U32 [[ZEXT]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_S_MUL_U64_U32_]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ZEXT %0(i32) + %2:_(i64) = G_CONSTANT i64 12345 + %3:_(i64) = G_MUL %1, %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -47,14 +47,14 @@ body: | ; CHECK-LABEL: name: mul_s64_sext ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12345 - ; CHECK-NEXT: [[AMDGPU_:%[0-9]+]]:_(s64) = G_AMDGPU_S_MUL_I64_I32 [[SEXT]], [[C]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_SEXT %0 - %2:_(s64) = G_CONSTANT i64 12345 - %3:_(s64) = G_MUL %1, %2 - $vgpr0_vgpr1 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 12345 + ; CHECK-NEXT: [[AMDGPU_S_MUL_I64_I32_:%[0-9]+]]:_(i64) = G_AMDGPU_S_MUL_I64_I32 [[SEXT]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_S_MUL_I64_I32_]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_SEXT %0(i32) + %2:_(i64) = G_CONSTANT i64 12345 + %3:_(i64) = G_MUL %1, %2 + $vgpr0_vgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir index 2c545c89da218..dea162f8816fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir @@ -14,17 +14,17 @@ body: | ; GCN-LABEL: name: bfe_sext_inreg_ashr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C1]] - ; GCN-NEXT: $vgpr0 = COPY [[SBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 4 - %2:_(s32) = G_ASHR %0, %1(s32) - %3:_(s32) = COPY %2(s32) - %4:_(s32) = G_SEXT_INREG %3, 16 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(i32) = G_SBFX [[COPY]], [[C]](i32), [[C1]] + ; GCN-NEXT: $vgpr0 = COPY [[SBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 4 + %2:_(i32) = G_ASHR %0, %1(i32) + %3:_(i32) = COPY %2(i32) + %4:_(i32) = G_SEXT_INREG %3, 16 + $vgpr0 = COPY %4(i32) ... --- @@ -39,17 +39,17 @@ body: | ; GCN-LABEL: name: bfe_sext_inreg_lshr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C1]] - ; GCN-NEXT: $vgpr0 = COPY [[SBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 4 - %2:_(s32) = G_LSHR %0, %1(s32) - %3:_(s32) = COPY %2(s32) - %4:_(s32) = G_SEXT_INREG %3, 16 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(i32) = G_SBFX [[COPY]], [[C]](i32), [[C1]] + ; GCN-NEXT: $vgpr0 = COPY [[SBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 4 + %2:_(i32) = G_LSHR %0, %1(i32) + %3:_(i32) = COPY %2(i32) + %4:_(i32) = G_SEXT_INREG %3, 16 + $vgpr0 = COPY %4(i32) ... --- @@ -64,17 +64,17 @@ body: | ; GCN-LABEL: name: bfe_sext_inreg_ashr_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(s64) = G_SBFX [[COPY]], [[C]](s32), [[C1]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SBFX]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 4 - %2:_(s64) = G_ASHR %0, %1(s32) - %3:_(s64) = COPY %2(s64) - %4:_(s64) = G_SEXT_INREG %3, 16 - $vgpr0_vgpr1 = COPY %4(s64) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 4 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[SBFX:%[0-9]+]]:_(i64) = G_SBFX [[COPY]], [[C]](i32), [[C1]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SBFX]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 4 + %2:_(i64) = G_ASHR %0, %1(i32) + %3:_(i64) = COPY %2(i64) + %4:_(i64) = G_SEXT_INREG %3, 16 + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -89,17 +89,17 @@ body: | ; GCN-LABEL: name: toobig_sext_inreg_ashr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 20 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 16 - %2:_(s32) = G_ASHR %0, %1(s32) - %3:_(s32) = COPY %2(s32) - %4:_(s32) = G_SEXT_INREG %3, 20 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR]], 20 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 16 + %2:_(i32) = G_ASHR %0, %1(i32) + %3:_(i32) = COPY %2(i32) + %4:_(i32) = G_SEXT_INREG %3, 20 + $vgpr0 = COPY %4(i32) ... --- @@ -114,21 +114,21 @@ body: | ; GCN-LABEL: name: toobig_sext_inreg_ashr_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C1]](s32) - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR1]](s32), [[ASHR]](s32) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV]], 32 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 40 - %2:_(s64) = G_ASHR %0, %1(s32) - %3:_(s64) = COPY %2(s64) - %4:_(s64) = G_SEXT_INREG %3, 32 - $vgpr0_vgpr1 = COPY %4(s64) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[ASHR1:%[0-9]+]]:_(i32) = G_ASHR [[UV1]], [[C1]](i32) + ; GCN-NEXT: [[MV:%[0-9]+]]:_(i64) = G_MERGE_VALUES [[ASHR1]](i32), [[ASHR]](i32) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i64) = G_SEXT_INREG [[MV]], 32 + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 40 + %2:_(i64) = G_ASHR %0, %1(i32) + %3:_(i64) = COPY %2(i64) + %4:_(i64) = G_SEXT_INREG %3, 32 + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -143,15 +143,15 @@ body: | ; GCN-LABEL: name: var_sext_inreg_ashr_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 10 - ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_ASHR %0, %1(s32) - %3:_(s32) = COPY %2(s32) - %4:_(s32) = G_SEXT_INREG %3, 10 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr1 + ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[COPY1]](i32) + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(i32) = G_SEXT_INREG [[ASHR]], 10 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_ASHR %0, %1(i32) + %3:_(i32) = COPY %2(i32) + %4:_(i32) = G_SEXT_INREG %3, 10 + $vgpr0 = COPY %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir index 047452fa97140..b49d10f345f45 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir @@ -11,15 +11,15 @@ body: | ; GCN-LABEL: name: select_from_different_results_of_unmerge_values ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 - %0:_(<2 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_TRUNC %1:_(s32) - %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) - %5:_(s32) = G_SELECT %2:_(s1), %3:_, %4:_ - $vgpr0 = COPY %5 + %0:_(<2 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_TRUNC %1(i32) + %3:_(i32), %4:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %5:_(i32) = G_SELECT %2(i1), %3, %4 + $vgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG $vgpr0 ... @@ -34,15 +34,15 @@ body: | ; GCN-LABEL: name: select_from_same_results_of_unmerge_values ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(i32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](i32) ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 - %0:_(<2 x s32>) = G_IMPLICIT_DEF - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_TRUNC %1:_(s32) - %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) - %5:_(s32) = G_SELECT %2:_(s1), %3:_, %3:_ - $vgpr0 = COPY %5 + %0:_(<2 x i32>) = G_IMPLICIT_DEF + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_TRUNC %1(i32) + %3:_(i32), %4:_(i32) = G_UNMERGE_VALUES %0(<2 x i32>) + %5:_(i32) = G_SELECT %2(i1), %3, %3 + $vgpr0 = COPY %5(i32) SI_RETURN_TO_EPILOG $vgpr0 ... @@ -57,20 +57,20 @@ body: | ; GCN-LABEL: name: select_different_result_from_different_unmerge_values_with_the_same_source ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV1]], [[UV7]] - ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s1) = G_TRUNC %1:_(s32) - %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0:_(<4 x s32>) - %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %0:_(<4 x s32>) - %11:_(s32) = G_SELECT %2:_(s1), %4:_, %10:_ - $vgpr0 = COPY %11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $vgpr4 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(i1) = G_TRUNC [[COPY1]](i32) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(i32), [[UV5:%[0-9]+]]:_(i32), [[UV6:%[0-9]+]]:_(i32), [[UV7:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(i32) = G_SELECT [[TRUNC]](i1), [[UV1]], [[UV7]] + ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](i32) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i1) = G_TRUNC %1(i32) + %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32) = G_UNMERGE_VALUES %0(<4 x i32>) + %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %0(<4 x i32>) + %11:_(i32) = G_SELECT %2(i1), %4, %10 + $vgpr0 = COPY %11(i32) ... --- @@ -83,14 +83,14 @@ body: | ; GCN-LABEL: name: select_same_result_from_different_unmerge_values_with_the_same_source ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV1]](s32) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s1) = G_TRUNC %1:_(s32) - %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0:_(<4 x s32>) - %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %0:_(<4 x s32>) - %11:_(s32) = G_SELECT %2:_(s1), %4:_, %8:_ - $vgpr0 = COPY %11 + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[UV:%[0-9]+]]:_(i32), [[UV1:%[0-9]+]]:_(i32), [[UV2:%[0-9]+]]:_(i32), [[UV3:%[0-9]+]]:_(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV1]](i32) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i1) = G_TRUNC %1(i32) + %3:_(i32), %4:_(i32), %5:_(i32), %6:_(i32) = G_UNMERGE_VALUES %0(<4 x i32>) + %7:_(i32), %8:_(i32), %9:_(i32), %10:_(i32) = G_UNMERGE_VALUES %0(<4 x i32>) + %11:_(i32) = G_SELECT %2(i1), %4, %8 + $vgpr0 = COPY %11(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir index a3389097b0b32..cb47ca7146ab6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir @@ -10,12 +10,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_s32_s32 - ; CHECK: %shift:_(s32) = G_CONSTANT i32 1540166912 - ; CHECK-NEXT: $vgpr0 = COPY %shift(s32) - %val:_(s32) = G_CONSTANT i32 123456789 - %shift_amt:_(s32) = G_CONSTANT i32 8 - %shift:_(s32) = G_SHL %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %shift:_(i32) = G_CONSTANT i32 1540166912 + ; CHECK-NEXT: $vgpr0 = COPY %shift(i32) + %val:_(i32) = G_CONSTANT i32 123456789 + %shift_amt:_(i32) = G_CONSTANT i32 8 + %shift:_(i32) = G_SHL %val, %shift_amt(i32) + $vgpr0 = COPY %shift(i32) ... @@ -26,13 +26,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_s16_s16 - ; CHECK: %ext:_(s32) = G_CONSTANT i32 912 - ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) - %val:_(s16) = G_CONSTANT i16 12345 - %shift_amt:_(s16) = G_CONSTANT i16 4 - %shift:_(s16) = G_SHL %val, %shift_amt - %ext:_(s32) = G_ANYEXT %shift - $vgpr0 = COPY %ext + ; CHECK: %ext:_(i32) = G_CONSTANT i32 912 + ; CHECK-NEXT: $vgpr0 = COPY %ext(i32) + %val:_(i16) = G_CONSTANT i16 12345 + %shift_amt:_(i16) = G_CONSTANT i16 4 + %shift:_(i16) = G_SHL %val, %shift_amt(i16) + %ext:_(i32) = G_ANYEXT %shift(i16) + $vgpr0 = COPY %ext(i32) ... @@ -43,12 +43,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_s64_s32 - ; CHECK: %shift:_(s64) = G_CONSTANT i64 31604937984 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %shift(s64) - %val:_(s64) = G_CONSTANT i64 123456789 - %shift_amt:_(s32) = G_CONSTANT i32 8 - %shift:_(s64) = G_SHL %val, %shift_amt - $vgpr0_vgpr1 = COPY %shift + ; CHECK: %shift:_(i64) = G_CONSTANT i64 31604937984 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %shift(i64) + %val:_(i64) = G_CONSTANT i64 123456789 + %shift_amt:_(i32) = G_CONSTANT i32 8 + %shift:_(i64) = G_SHL %val, %shift_amt(i32) + $vgpr0_vgpr1 = COPY %shift(i64) ... @@ -59,20 +59,20 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_v2s16_v2s16 - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %val1:_(s16) = G_CONSTANT i16 5678 - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %val1(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %shift_amt_elt(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_SHL %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %val1:_(s16) = G_CONSTANT i16 5678 - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %val1 - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %shift_amt_elt - %shift:_(<2 x s16>) = G_SHL %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %val1:_(i16) = G_CONSTANT i16 5678 + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_SHL %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %val1:_(i16) = G_CONSTANT i16 5678 + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + %shift:_(<2 x i16>) = G_SHL %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... @@ -83,22 +83,22 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_v2s16_v2s16_undef_amt_elt - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %val1:_(s16) = G_CONSTANT i16 5678 - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %val1(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %undef:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %undef(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_SHL %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %val1:_(s16) = G_CONSTANT i16 5678 - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %val1 - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %undef:_(s16) = G_IMPLICIT_DEF - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %undef - %shift:_(<2 x s16>) = G_SHL %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %val1:_(i16) = G_CONSTANT i16 5678 + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %undef:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %undef(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_SHL %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %val1:_(i16) = G_CONSTANT i16 5678 + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %val1(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %undef:_(i16) = G_IMPLICIT_DEF + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %undef(i16) + %shift:_(<2 x i16>) = G_SHL %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... @@ -109,19 +109,19 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_v2s16_v2s16_undef_val_elt - ; CHECK: %val0:_(s16) = G_CONSTANT i16 1234 - ; CHECK-NEXT: %undef:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %val:_(<2 x s16>) = G_BUILD_VECTOR %val0(s16), %undef(s16) - ; CHECK-NEXT: %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt(s16), %shift_amt_elt(s16) - ; CHECK-NEXT: %shift:_(<2 x s16>) = G_SHL %val, %shift_amt(<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x s16>) - %val0:_(s16) = G_CONSTANT i16 1234 - %undef:_(s16) = G_IMPLICIT_DEF - %val:_(<2 x s16>) = G_BUILD_VECTOR %val0, %undef - %shift_amt_elt:_(s16) = G_CONSTANT i16 8 - %shift_amt:_(<2 x s16>) = G_BUILD_VECTOR %shift_amt_elt, %shift_amt_elt - %shift:_(<2 x s16>) = G_SHL %val, %shift_amt - $vgpr0 = COPY %shift + ; CHECK: %val0:_(i16) = G_CONSTANT i16 1234 + ; CHECK-NEXT: %undef:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %undef(i16) + ; CHECK-NEXT: %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + ; CHECK-NEXT: %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + ; CHECK-NEXT: %shift:_(<2 x i16>) = G_SHL %val, %shift_amt(<2 x i16>) + ; CHECK-NEXT: $vgpr0 = COPY %shift(<2 x i16>) + %val0:_(i16) = G_CONSTANT i16 1234 + %undef:_(i16) = G_IMPLICIT_DEF + %val:_(<2 x i16>) = G_BUILD_VECTOR %val0(i16), %undef(i16) + %shift_amt_elt:_(i16) = G_CONSTANT i16 8 + %shift_amt:_(<2 x i16>) = G_BUILD_VECTOR %shift_amt_elt(i16), %shift_amt_elt(i16) + %shift:_(<2 x i16>) = G_SHL %val, %shift_amt(<2 x i16>) + $vgpr0 = COPY %shift(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir index c9959bfec4419..4d5a739d11bb1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir @@ -14,17 +14,17 @@ body: | ; GCN-LABEL: name: bfe_and_lshr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]] - ; GCN-NEXT: $vgpr0 = COPY [[UBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s32) = G_LSHR %0, %1(s32) - %3:_(s32) = G_CONSTANT i32 31 - %4:_(s32) = G_AND %2, %3 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(i32) = G_UBFX [[COPY]], [[C1]](i32), [[C]] + ; GCN-NEXT: $vgpr0 = COPY [[UBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i32) = G_LSHR %0, %1(i32) + %3:_(i32) = G_CONSTANT i32 31 + %4:_(i32) = G_AND %2, %3 + $vgpr0 = COPY %4(i32) ... @@ -40,17 +40,17 @@ body: | ; GCN-LABEL: name: bfe_and_lshr_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[COPY]], [[C1]](s32), [[C]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[UBFX]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s64) = G_LSHR %0, %1(s32) - %3:_(s64) = G_CONSTANT i64 1023 - %4:_(s64) = G_AND %2, %3 - $vgpr0_vgpr1 = COPY %4(s64) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(i64) = G_UBFX [[COPY]], [[C1]](i32), [[C]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[UBFX]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i64) = G_LSHR %0, %1(i32) + %3:_(i64) = G_CONSTANT i64 1023 + %4:_(i64) = G_AND %2, %3 + $vgpr0_vgpr1 = COPY %4(i64) ... @@ -66,16 +66,16 @@ body: | ; GCN-LABEL: name: toobig_and_lshr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GCN-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 28 - %2:_(s32) = G_LSHR %0, %1(s32) - %3:_(s32) = G_CONSTANT i32 511 - %4:_(s32) = G_AND %2, %3 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 28 + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(i32) = G_LSHR [[COPY]], [[C]](i32) + ; GCN-NEXT: $vgpr0 = COPY [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 28 + %2:_(i32) = G_LSHR %0, %1(i32) + %3:_(i32) = G_CONSTANT i32 511 + %4:_(i32) = G_AND %2, %3 + $vgpr0 = COPY %4(i32) ... @@ -91,18 +91,18 @@ body: | ; GCN-LABEL: name: bfe_and_ashr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; GCN-NEXT: $vgpr0 = COPY [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s32) = G_ASHR %0, %1(s32) - %3:_(s32) = G_CONSTANT i32 31 - %4:_(s32) = G_AND %2, %3 - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[ASHR:%[0-9]+]]:_(i32) = G_ASHR [[COPY]], [[C]](i32) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 31 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(i32) = G_AND [[ASHR]], [[C1]] + ; GCN-NEXT: $vgpr0 = COPY [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i32) = G_ASHR %0, %1(i32) + %3:_(i32) = G_CONSTANT i32 31 + %4:_(i32) = G_AND %2, %3 + $vgpr0 = COPY %4(i32) ... @@ -118,17 +118,17 @@ body: | ; GCN-LABEL: name: bfe_lshr_and_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]] - ; GCN-NEXT: $vgpr0 = COPY [[UBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 7936 ; 31 << 8 - %2:_(s32) = G_AND %0, %1 - %3:_(s32) = G_CONSTANT i32 8 - %4:_(s32) = G_LSHR %2, %3(s32) - $vgpr0 = COPY %4(s32) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 5 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(i32) = G_UBFX [[COPY]], [[C1]](i32), [[C]] + ; GCN-NEXT: $vgpr0 = COPY [[UBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 7936 + %2:_(i32) = G_AND %0, %1 + %3:_(i32) = G_CONSTANT i32 8 + %4:_(i32) = G_LSHR %2, %3(i32) + $vgpr0 = COPY %4(i32) ... @@ -144,16 +144,16 @@ body: | ; GCN-LABEL: name: bfe_lshr_and_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[COPY]], [[C1]](s32), [[C]] - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[UBFX]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 261888 ; 1023 << 8 - %2:_(s64) = G_AND %0, %1 - %3:_(s32) = G_CONSTANT i32 8 - %4:_(s64) = G_LSHR %2, %3(s32) - $vgpr0_vgpr1 = COPY %4(s64) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 10 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(i32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(i64) = G_UBFX [[COPY]], [[C1]](i32), [[C]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[UBFX]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_CONSTANT i64 261888 + %2:_(i64) = G_AND %0, %1 + %3:_(i32) = G_CONSTANT i32 8 + %4:_(i64) = G_LSHR %2, %3(i32) + $vgpr0_vgpr1 = COPY %4(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir index 245e740ed8100..d8983d01c5d6d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir @@ -10,21 +10,21 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_v2 @@ -35,21 +35,21 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_v2 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(<2 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) - %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %div:_(<2 x i32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 - %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) - %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) + %div:_(<2 x i32>) = G_SDIV %src1, %src2 + G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %rem:_(<2 x i32>) = G_SREM %src1, %src2 + G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- name: test_sdiv_srem_v4 @@ -60,21 +60,21 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_v4 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: %div:_(<4 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %div:_(<4 x i32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %rem(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 - %div:_(<4 x s32>) = G_SDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) - %rem:_(<4 x s32>) = G_SREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) + %div:_(<4 x i32>) = G_SDIV %src1, %src2 + G_STORE %div(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %rem:_(<4 x i32>) = G_SREM %src1, %src2 + G_STORE %rem(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) ... --- name: test_srem_sdiv @@ -85,21 +85,21 @@ body: | ; CHECK-LABEL: name: test_srem_sdiv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_srem_sdiv_v2 @@ -110,21 +110,21 @@ body: | ; CHECK-LABEL: name: test_srem_sdiv_v2 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(<2 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) - %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %div:_(<2 x i32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %div(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 - %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) - %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) + %rem:_(<2 x i32>) = G_SREM %src1, %src2 + G_STORE %rem(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %div:_(<2 x i32>) = G_SDIV %src1, %src2 + G_STORE %div(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- name: test_srem_sdiv_v4 @@ -135,21 +135,21 @@ body: | ; CHECK-LABEL: name: test_srem_sdiv_v4 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: %div:_(<4 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %div:_(<4 x i32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %div(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 - %rem:_(<4 x s32>) = G_SREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) - %div:_(<4 x s32>) = G_SDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) + %rem:_(<4 x i32>) = G_SREM %src1, %src2 + G_STORE %rem(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %div:_(<4 x i32>) = G_SDIV %src1, %src2 + G_STORE %div(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) ... --- name: test_udiv_urem @@ -160,21 +160,21 @@ body: | ; CHECK-LABEL: name: test_udiv_urem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_UDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_UREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_udiv_urem_v2 @@ -185,21 +185,21 @@ body: | ; CHECK-LABEL: name: test_udiv_urem_v2 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(<2 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) - %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %div:_(<2 x i32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 - %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) - %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) + %div:_(<2 x i32>) = G_UDIV %src1, %src2 + G_STORE %div(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %rem:_(<2 x i32>) = G_UREM %src1, %src2 + G_STORE %rem(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- name: test_udiv_urem_v4 @@ -210,21 +210,21 @@ body: | ; CHECK-LABEL: name: test_udiv_urem_v4 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: %div:_(<4 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %div:_(<4 x i32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %rem(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 - %div:_(<4 x s32>) = G_UDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) - %rem:_(<4 x s32>) = G_UREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) + %div:_(<4 x i32>) = G_UDIV %src1, %src2 + G_STORE %div(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %rem:_(<4 x i32>) = G_UREM %src1, %src2 + G_STORE %rem(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) ... --- name: test_urem_udiv @@ -235,21 +235,21 @@ body: | ; CHECK-LABEL: name: test_urem_udiv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %rem:_(i32) = G_UREM %src1, %src2 + G_STORE %rem(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %div:_(i32) = G_UDIV %src1, %src2 + G_STORE %div(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_urem_udiv_v2 @@ -260,21 +260,21 @@ body: | ; CHECK-LABEL: name: test_urem_udiv_v2 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(<2 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) - %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: %div:_(<2 x i32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %div(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %src1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x i32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 - %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) - %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) + %rem:_(<2 x i32>) = G_UREM %src1, %src2 + G_STORE %rem(<2 x i32>), %ptr1(p1) :: (store (<2 x i32>), align 4, addrspace 1) + %div:_(<2 x i32>) = G_UDIV %src1, %src2 + G_STORE %div(<2 x i32>), %ptr2(p1) :: (store (<2 x i32>), align 4, addrspace 1) ... --- name: test_urem_udiv_v4 @@ -285,21 +285,21 @@ body: | ; CHECK-LABEL: name: test_urem_udiv_v4 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr10_vgpr11 - ; CHECK-NEXT: %div:_(<4 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; CHECK-NEXT: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: %div:_(<4 x i32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE %div(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %src1:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x i32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 - %rem:_(<4 x s32>) = G_UREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) - %div:_(<4 x s32>) = G_UDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) + %rem:_(<4 x i32>) = G_UREM %src1, %src2 + G_STORE %rem(<4 x i32>), %ptr1(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %div:_(<4 x i32>) = G_UDIV %src1, %src2 + G_STORE %div(<4 x i32>), %ptr2(p1) :: (store (<4 x i32>), align 4, addrspace 1) ... --- name: test_sdiv_srem_extra_use @@ -310,29 +310,29 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_extra_use ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr2:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr3:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr4:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: G_STORE %src1(s32), %ptr1(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE %src2(s32), %ptr2(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr3(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %rem(s32), %ptr4(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: G_STORE %src1(i32), %ptr1(p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE %src2(i32), %ptr2(p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr3(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %rem(i32), %ptr4(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = G_IMPLICIT_DEF %ptr2:_(p1) = G_IMPLICIT_DEF %ptr3:_(p1) = COPY $vgpr2_vgpr3 %ptr4:_(p1) = COPY $vgpr4_vgpr5 - G_STORE %src1:_(s32), %ptr1:_(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - G_STORE %src2:_(s32), %ptr2:_(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr4:_(p1) :: (store (s32), addrspace 1, align 4) + G_STORE %src1(i32), %ptr1(p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + G_STORE %src2(i32), %ptr2(p1) :: (volatile store (i32) into `ptr addrspace(1) undef`, addrspace 1) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr3(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr4(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_extra_sdiv @@ -340,32 +340,30 @@ tracksRegLiveness: true body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 - ; Combine the first sdiv/srem pair into sdivrem and retain the extra - ; sdiv instruction. ; CHECK-LABEL: name: test_sdiv_srem_extra_sdiv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr3:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %div2:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div2(s32), %ptr3(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %div2:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div2(i32), %ptr3(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) - %div2:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %div2:_(i32) = G_SDIV %src1, %src2 + G_STORE %div2(i32), %ptr3(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_extra_srem @@ -373,32 +371,30 @@ tracksRegLiveness: true body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 - ; Combine the first sdiv/srem pair into sdivrem and retain the extra - ; srem instruction. ; CHECK-LABEL: name: test_sdiv_srem_extra_srem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK-NEXT: %ptr3:_(p1) = COPY $vgpr6_vgpr7 - ; CHECK-NEXT: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem2:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem2(s32), %ptr3(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem2:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem2(i32), %ptr3(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) - %rem2:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %rem2:_(i32) = G_SREM %src1, %src2 + G_STORE %rem2(i32), %ptr3(p1) :: (store (i32), addrspace 1) ... # Some negative tests. --- @@ -410,24 +406,24 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_different_src_opnd2 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %src3:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 + ; CHECK-NEXT: %src3:_(i32) = COPY $vgpr2 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr3_vgpr4 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr5_vgpr6 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src3 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 - %src3:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src3 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 + %src3:_(i32) = COPY $vgpr2 %ptr1:_(p1) = COPY $vgpr3_vgpr4 %ptr2:_(p1) = COPY $vgpr5_vgpr6 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src3:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src3 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_src_opnds_swapped @@ -438,22 +434,22 @@ body: | ; CHECK-LABEL: name: test_sdiv_srem_src_opnds_swapped ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src2, %src1 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src2, %src1 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src2:_(s32), %src1:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src2, %src1 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_urem @@ -464,22 +460,22 @@ body: | ; CHECK-LABEL: name: test_sdiv_urem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_UREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_UREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_UREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_udiv_srem @@ -490,22 +486,22 @@ body: | ; CHECK-LABEL: name: test_udiv_srem ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %div:_(s32) = G_UDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %div:_(i32) = G_UDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_UDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... --- name: test_sdiv_srem_different_blocks @@ -516,30 +512,34 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src1:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %src1:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src2:_(i32) = COPY $vgpr1 ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK-NEXT: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: %div:_(i32) = G_SDIV %src1, %src2 + ; CHECK-NEXT: G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: liveins: $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr2:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK-NEXT: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: %rem:_(i32) = G_SREM %src1, %src2 + ; CHECK-NEXT: G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 - %src1:_(s32) = COPY $vgpr0 - %src2:_(s32) = COPY $vgpr1 + + %src1:_(i32) = COPY $vgpr0 + %src2:_(i32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) + %div:_(i32) = G_SDIV %src1, %src2 + G_STORE %div(i32), %ptr1(p1) :: (store (i32), addrspace 1) S_BRANCH %bb.1 + bb.1: liveins: $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 - %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + %rem:_(i32) = G_SREM %src1, %src2 + G_STORE %rem(i32), %ptr2(p1) :: (store (i32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir index b6434c60e3f29..90b2763b8e2ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir @@ -10,14 +10,16 @@ body: | ; CHECK-LABEL: name: fptrunc_fpext_s16_to_s32_to_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %reg(s32) - %reg:_(s32) = COPY $vgpr0 - %src:_(s16) = G_TRUNC %reg - %fpext:_(s32) = G_FPEXT %src - %fptrunc:_(s16) = G_FPTRUNC %fpext - %anyext:_(s32) = G_ANYEXT %fptrunc - $vgpr0 = COPY %anyext + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %reg(i32) + %reg:_(i32) = COPY $vgpr0 + %src:_(i16) = G_TRUNC %reg(i32) + %2:_(f16) = G_BITCAST %src(i16) + %fpext:_(f32) = G_FPEXT %2(f16) + %fptrunc:_(f16) = G_FPTRUNC %fpext(f32) + %5:_(i16) = G_BITCAST %fptrunc(f16) + %anyext:_(i32) = G_ANYEXT %5(i16) + $vgpr0 = COPY %anyext(i32) ... --- @@ -29,16 +31,20 @@ body: | ; CHECK-LABEL: name: fptrunc_fpext_s16_to_s64_to_s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src:_(s16) = G_TRUNC %reg(s32) - ; CHECK-NEXT: %fpext:_(s64) = G_FPEXT %src(s16) - ; CHECK-NEXT: %fptrunc:_(s32) = G_FPTRUNC %fpext(s64) - ; CHECK-NEXT: $vgpr0 = COPY %fptrunc(s32) - %reg:_(s32) = COPY $vgpr0 - %src:_(s16) = G_TRUNC %reg - %fpext:_(s64) = G_FPEXT %src - %fptrunc:_(s32) = G_FPTRUNC %fpext - $vgpr0 = COPY %fptrunc + ; CHECK-NEXT: %reg:_(i32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(i16) = G_TRUNC %reg(i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(f16) = G_BITCAST %src(i16) + ; CHECK-NEXT: %fpext:_(f64) = G_FPEXT [[BITCAST]](f16) + ; CHECK-NEXT: %fptrunc:_(f32) = G_FPTRUNC %fpext(f64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(i32) = G_BITCAST %fptrunc(f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %reg:_(i32) = COPY $vgpr0 + %src:_(i16) = G_TRUNC %reg(i32) + %2:_(f16) = G_BITCAST %src(i16) + %fpext:_(f64) = G_FPEXT %2(f16) + %fptrunc:_(f32) = G_FPTRUNC %fpext(f64) + %5:_(i32) = G_BITCAST %fptrunc(f32) + $vgpr0 = COPY %5(i32) ... --- @@ -50,12 +56,14 @@ body: | ; CHECK-LABEL: name: fptrunc_fpext_v2s16_to_v2s32_to_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %fpext:_(<2 x s32>) = G_FPEXT %src - %fptrunc:_(<2 x s16>) = G_FPTRUNC %fpext - $vgpr0 = COPY %fptrunc + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %src(<2 x i16>) + %fpext:_(<2 x f32>) = G_FPEXT %1(<2 x f16>) + %fptrunc:_(<2 x f16>) = G_FPTRUNC %fpext(<2 x f32>) + %4:_(<2 x i16>) = G_BITCAST %fptrunc(<2 x f16>) + $vgpr0 = COPY %4(<2 x i16>) ... --- @@ -67,12 +75,16 @@ body: | ; CHECK-LABEL: name: fptrunc_fpext_v2s16_to_v2s64_to_v2s32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %src(<2 x s16>) - ; CHECK-NEXT: %fptrunc:_(<2 x s32>) = G_FPTRUNC %fpext(<2 x s64>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %fptrunc(<2 x s32>) - %src:_(<2 x s16>) = COPY $vgpr0 - %fpext:_(<2 x s64>) = G_FPEXT %src - %fptrunc:_(<2 x s32>) = G_FPTRUNC %fpext - $vgpr0_vgpr1 = COPY %fptrunc + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x f16>) = G_BITCAST %src(<2 x i16>) + ; CHECK-NEXT: %fpext:_(<2 x f64>) = G_FPEXT [[BITCAST]](<2 x f16>) + ; CHECK-NEXT: %fptrunc:_(<2 x f32>) = G_FPTRUNC %fpext(<2 x f64>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x i32>) = G_BITCAST %fptrunc(<2 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST1]](<2 x i32>) + %src:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x f16>) = G_BITCAST %src(<2 x i16>) + %fpext:_(<2 x f64>) = G_FPEXT %1(<2 x f16>) + %fptrunc:_(<2 x f32>) = G_FPTRUNC %fpext(<2 x f64>) + %4:_(<2 x i32>) = G_BITCAST %fptrunc(<2 x f32>) + $vgpr0_vgpr1 = COPY %4(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir index 6371001c40764..30454e8a6daf5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir @@ -10,12 +10,12 @@ body: | ; CHECK-LABEL: name: s32_bitcast ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) - %src:_(<2 x s16>) = COPY $vgpr0 - %b1:_(s32) = G_BITCAST %src - %b2:_(<2 x s16>) = G_BITCAST %b1 - $vgpr0 = COPY %b2 + ; CHECK-NEXT: %src:_(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x i16>) + %src:_(<2 x i16>) = COPY $vgpr0 + %b1:_(i32) = G_BITCAST %src(<2 x i16>) + %b2:_(<2 x i16>) = G_BITCAST %b1(i32) + $vgpr0 = COPY %b2(<2 x i16>) ... --- @@ -27,12 +27,12 @@ body: | ; CHECK-LABEL: name: s64_bitcast ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %src(<2 x s32>) - %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %b1:_(s64) = G_BITCAST %src - %b2:_(<2 x s32>) = G_BITCAST %b1 - $vgpr0_vgpr1 = COPY %b2 + ; CHECK-NEXT: %src:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %src(<2 x i32>) + %src:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %b1:_(i64) = G_BITCAST %src(<2 x i32>) + %b2:_(<2 x i32>) = G_BITCAST %b1(i64) + $vgpr0_vgpr1 = COPY %b2(<2 x i32>) ... --- @@ -44,12 +44,12 @@ body: | ; CHECK-LABEL: name: s64_bitcast_differentypes_nofold ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %b1:_(s64) = G_BITCAST %src(<2 x s32>) - ; CHECK-NEXT: %b2:_(<4 x s16>) = G_BITCAST %b1(s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %b2(<4 x s16>) - %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %b1:_(s64) = G_BITCAST %src - %b2:_(<4 x s16>) = G_BITCAST %b1 - $vgpr0_vgpr1 = COPY %b2 + ; CHECK-NEXT: %src:_(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %b1:_(i64) = G_BITCAST %src(<2 x i32>) + ; CHECK-NEXT: %b2:_(<4 x i16>) = G_BITCAST %b1(i64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %b2(<4 x i16>) + %src:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %b1:_(i64) = G_BITCAST %src(<2 x i32>) + %b2:_(<4 x i16>) = G_BITCAST %b1(i64) + $vgpr0_vgpr1 = COPY %b2(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir index c6e7853992d96..203659a2ea877 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir @@ -13,16 +13,16 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: %inreg:_(s32) = G_AND %load, [[C]] - ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + ; CHECK-NEXT: %load:_(i32) = G_LOAD %ptr(p1) :: (volatile load (i32), addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %inreg:_(i32) = G_AND %load, [[C]] + ; CHECK-NEXT: $vgpr0 = COPY %inreg(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4) - %mask:_(s32) = G_CONSTANT i32 -255 - %and:_(s32) = G_AND %load, %mask - %inreg:_(s32) = G_SEXT_INREG %and, 8 - $vgpr0 = COPY %inreg + %load:_(i32) = G_LOAD %ptr(p1) :: (volatile load (i32), addrspace 1) + %mask:_(i32) = G_CONSTANT i32 -255 + %and:_(i32) = G_AND %load, %mask + %inreg:_(i32) = G_SEXT_INREG %and, 8 + $vgpr0 = COPY %inreg(i32) ... @@ -38,17 +38,17 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1) - ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %and:_(s32) = G_AND %load, %mask - ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %and, 8 - ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + ; CHECK-NEXT: %load:_(i32) = G_LOAD %ptr(p1) :: (volatile load (i32), addrspace 1) + ; CHECK-NEXT: %mask:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %and:_(i32) = G_AND %load, %mask + ; CHECK-NEXT: %inreg:_(i32) = G_SEXT_INREG %and, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(i32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4) - %mask:_(s32) = G_CONSTANT i32 255 - %and:_(s32) = G_AND %load, %mask - %inreg:_(s32) = G_SEXT_INREG %and, 8 - $vgpr0 = COPY %inreg + %load:_(i32) = G_LOAD %ptr(p1) :: (volatile load (i32), addrspace 1) + %mask:_(i32) = G_CONSTANT i32 255 + %and:_(i32) = G_AND %load, %mask + %inreg:_(i32) = G_SEXT_INREG %and, 8 + $vgpr0 = COPY %inreg(i32) ... @@ -64,21 +64,21 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1) - ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 -255 - ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32) - ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_AND %and, [[BUILD_VECTOR]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>) + ; CHECK-NEXT: %load:_(<2 x i32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x i32>), addrspace 1) + ; CHECK-NEXT: %mask_elt:_(i32) = G_CONSTANT i32 -255 + ; CHECK-NEXT: %mask:_(<2 x i32>) = G_BUILD_VECTOR %mask_elt(i32), %mask_elt(i32) + ; CHECK-NEXT: %and:_(<2 x i32>) = G_AND %load, %mask + ; CHECK-NEXT: [[C:%[0-9]+]]:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i32>) = G_BUILD_VECTOR [[C]](i32), [[C]](i32) + ; CHECK-NEXT: %inreg:_(<2 x i32>) = G_AND %and, [[BUILD_VECTOR]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x i32>) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8) - %mask_elt:_(s32) = G_CONSTANT i32 -255 - %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt - %and:_(<2 x s32>) = G_AND %load, %mask - %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 - $vgpr0_vgpr1 = COPY %inreg + %load:_(<2 x i32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x i32>), addrspace 1) + %mask_elt:_(i32) = G_CONSTANT i32 -255 + %mask:_(<2 x i32>) = G_BUILD_VECTOR %mask_elt(i32), %mask_elt(i32) + %and:_(<2 x i32>) = G_AND %load, %mask + %inreg:_(<2 x i32>) = G_SEXT_INREG %and, 8 + $vgpr0_vgpr1 = COPY %inreg(<2 x i32>) ... @@ -94,18 +94,18 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1) - ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32) - ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask - ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>) + ; CHECK-NEXT: %load:_(<2 x i32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x i32>), addrspace 1) + ; CHECK-NEXT: %mask_elt:_(i32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %mask:_(<2 x i32>) = G_BUILD_VECTOR %mask_elt(i32), %mask_elt(i32) + ; CHECK-NEXT: %and:_(<2 x i32>) = G_AND %load, %mask + ; CHECK-NEXT: %inreg:_(<2 x i32>) = G_SEXT_INREG %and, 8 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x i32>) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8) - %mask_elt:_(s32) = G_CONSTANT i32 255 - %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt - %and:_(<2 x s32>) = G_AND %load, %mask - %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 - $vgpr0_vgpr1 = COPY %inreg + %load:_(<2 x i32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x i32>), addrspace 1) + %mask_elt:_(i32) = G_CONSTANT i32 255 + %mask:_(<2 x i32>) = G_BUILD_VECTOR %mask_elt(i32), %mask_elt(i32) + %and:_(<2 x i32>) = G_AND %load, %mask + %inreg:_(<2 x i32>) = G_SEXT_INREG %and, 8 + $vgpr0_vgpr1 = COPY %inreg(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir index a97d905f2a978..ce5229c146ede 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir @@ -17,32 +17,58 @@ body: | ; CHECK-LABEL: name: test_fmed3_f32_known_nnan_ieee_true ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = nnan G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) ; ; GFX12-LABEL: name: test_fmed3_f32_known_nnan_ieee_true ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %8 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %9:vgpr(s32) = COPY %5(s32) - %10:vgpr(s32) = COPY %6(s32) - %4:vgpr(s32) = nnan G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32) - $vgpr0 = COPY %4(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = nnan G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %8:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %9:sgpr(i32) = G_BITCAST %8(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:sgpr(i32) = G_BITCAST %7(f32) + %12:vgpr(i32) = COPY %11(i32) + %13:vgpr(i32) = G_BITCAST %6(f32) + %14:vgpr(i32) = nnan G_AMDGPU_FMED3 %13, %10, %12 + $vgpr0 = COPY %14(i32) ... --- @@ -61,38 +87,64 @@ body: | ; CHECK-LABEL: name: test_fmed3_f16_known_nnan_ieee_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f16) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST4]](i16) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMUL]](f16) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i16) = nnan G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[AMDGPU_FMED3_]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) ; ; GFX12-LABEL: name: test_fmed3_f16_known_nnan_ieee_false ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16) - ; GFX12-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %10:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = G_FMUL %0, %10 - %7:sgpr(s16) = G_FCONSTANT half 0xH3C00 - %6:sgpr(s16) = G_FCONSTANT half 0xH0000 - %11:vgpr(s16) = COPY %6(s16) - %12:vgpr(s16) = COPY %7(s16) - %5:vgpr(s16) = nnan G_AMDGPU_FMED3 %4(s16), %11(s16), %12(s16) - %9:vgpr(s32) = G_ANYEXT %5(s16) - $vgpr0 = COPY %9(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(f16) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST4]](i16) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMUL]](f16) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i16) = nnan G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[AMDGPU_FMED3_]](i16) + ; GFX12-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4000 + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:vgpr(i16) = COPY %3(i16) + %5:vgpr(f16) = G_BITCAST %1(i16) + %6:vgpr(f16) = G_BITCAST %4(i16) + %7:vgpr(f16) = G_FMUL %5, %6 + %8:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %9:sgpr(f16) = G_FCONSTANT half 0xH0000 + %10:sgpr(i16) = G_BITCAST %9(f16) + %11:vgpr(i16) = COPY %10(i16) + %12:sgpr(i16) = G_BITCAST %8(f16) + %13:vgpr(i16) = COPY %12(i16) + %14:vgpr(i16) = G_BITCAST %7(f16) + %15:vgpr(i16) = nnan G_AMDGPU_FMED3 %14, %11, %13 + %16:vgpr(i32) = G_ANYEXT %15(i16) + $vgpr0 = COPY %16(i32) ... --- @@ -111,35 +163,61 @@ body: | ; CHECK-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+01 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) ; ; GFX12-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01 - ; GFX12-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 1.000000e+01 - %8:vgpr(s32) = G_FCANONICALIZE %0 - %9:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMINNUM_IEEE %8, %9 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %10:vgpr(s32) = COPY %5(s32) - %11:vgpr(s32) = COPY %6(s32) - %4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %10(s32), %11(s32) - $vgpr0 = COPY %4(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+01 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX12-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX12-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 1.000000e+01 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:sgpr(i32) = G_BITCAST %1(f32) + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(f32) = G_BITCAST %5(i32) + %7:vgpr(f32) = G_FMINNUM_IEEE %3, %6 + %8:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %9:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %10:sgpr(i32) = G_BITCAST %9(f32) + %11:vgpr(i32) = COPY %10(i32) + %12:sgpr(i32) = G_BITCAST %8(f32) + %13:vgpr(i32) = COPY %12(i32) + %14:vgpr(i32) = G_BITCAST %7(f32) + %15:vgpr(i32) = G_AMDGPU_FMED3 %14, %11, %13 + $vgpr0 = COPY %15(i32) ... --- @@ -158,32 +236,58 @@ body: | ; CHECK-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) ; ; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %8 - %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %5:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %9:vgpr(s32) = COPY %5(s32) - %10:vgpr(s32) = COPY %6(s32) - %4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32) - $vgpr0 = COPY %4(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %8:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %9:sgpr(i32) = G_BITCAST %8(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:sgpr(i32) = G_BITCAST %7(f32) + %12:vgpr(i32) = COPY %11(i32) + %13:vgpr(i32) = G_BITCAST %6(f32) + %14:vgpr(i32) = G_AMDGPU_FMED3 %13, %10, %12 + $vgpr0 = COPY %14(i32) ... # FixMe: add tests with attributes #3 = {"no-nans-fp-math"="true"} @@ -204,36 +308,58 @@ body: | ; CHECK-LABEL: name: test_fmed3_f32_maybe_NaN_ieee_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) ; ; GFX12-LABEL: name: test_fmed3_f32_maybe_NaN_ieee_false ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %8 - %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %5:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %9:vgpr(s32) = COPY %5(s32) - %10:vgpr(s32) = COPY %6(s32) - %4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32) - $vgpr0 = COPY %4(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %8:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %9:sgpr(i32) = G_BITCAST %8(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:sgpr(i32) = G_BITCAST %7(f32) + %12:vgpr(i32) = COPY %11(i32) + %13:vgpr(i32) = G_BITCAST %6(f32) + %14:vgpr(i32) = G_AMDGPU_FMED3 %13, %10, %12 + $vgpr0 = COPY %14(i32) ... --- @@ -252,39 +378,61 @@ body: | ; CHECK-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMINNUM_IEEE]], [[COPY2]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+01 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) ; ; GFX12-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01 - ; GFX12-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 1.000000e+01 - %8:vgpr(s32) = G_FCANONICALIZE %0 - %9:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMINNUM_IEEE %8, %9 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %10:vgpr(s32) = COPY %5(s32) - %11:vgpr(s32) = COPY %6(s32) - %4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %10(s32), %11(s32) - $vgpr0 = COPY %4(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+01 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX12-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[BITCAST]] + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST1]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX12-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 1.000000e+01 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:sgpr(i32) = G_BITCAST %1(f32) + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(f32) = G_BITCAST %5(i32) + %7:vgpr(f32) = G_FMINNUM_IEEE %3, %6 + %8:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %9:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %10:sgpr(i32) = G_BITCAST %9(f32) + %11:vgpr(i32) = COPY %10(i32) + %12:sgpr(i32) = G_BITCAST %8(f32) + %13:vgpr(i32) = COPY %12(i32) + %14:vgpr(i32) = G_BITCAST %7(f32) + %15:vgpr(i32) = G_AMDGPU_FMED3 %14, %11, %13 + $vgpr0 = COPY %15(i32) ... --- @@ -303,30 +451,56 @@ body: | ; CHECK-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) ; ; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %8 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %9:vgpr(s32) = COPY %5(s32) - %10:vgpr(s32) = COPY %6(s32) - %4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32) - $vgpr0 = COPY %4(s32) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; GFX12-NEXT: [[BITCAST4:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST4]](i32) + ; GFX12-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMUL]](f32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FMED3 [[BITCAST5]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %8:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %9:sgpr(i32) = G_BITCAST %8(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:sgpr(i32) = G_BITCAST %7(f32) + %12:vgpr(i32) = COPY %11(i32) + %13:vgpr(i32) = G_BITCAST %6(f32) + %14:vgpr(i32) = G_AMDGPU_FMED3 %13, %10, %12 + $vgpr0 = COPY %14(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir index 70fd67363648d..c21b7f5c3e9e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir @@ -17,23 +17,44 @@ body: | ; CHECK-LABEL: name: test_min_max_ValK0_K1_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %9:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %9 - %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %10:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %10 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %11:vgpr(s32) = COPY %6(s32) - %7:vgpr(s32) = nnan G_FMINNUM_IEEE %5, %11 - $vgpr0 = COPY %7(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[FMUL]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMAXNUM_IEEE %6, %10 + %12:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %13:sgpr(i32) = G_BITCAST %12(f32) + %14:vgpr(i32) = COPY %13(i32) + %15:vgpr(f32) = G_BITCAST %14(i32) + %16:vgpr(f32) = nnan G_FMINNUM_IEEE %11, %15 + %17:vgpr(i32) = G_BITCAST %16(f32) + $vgpr0 = COPY %17(i32) ... --- @@ -52,23 +73,44 @@ body: | ; CHECK-LABEL: name: test_min_max_K0Val_K1_f64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s64) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s64) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_CLAMP]](s64) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00 - %13:vgpr(s64) = COPY %4(s64) - %5:vgpr(s64) = G_FMUL %0, %13 - %6:sgpr(s64) = G_FCONSTANT double 0.000000e+00 - %14:vgpr(s64) = COPY %6(s64) - %7:vgpr(s64) = nnan G_FMAXNUM %14, %5 - %8:sgpr(s64) = G_FCONSTANT double 1.000000e+00 - %15:vgpr(s64) = COPY %8(s64) - %9:vgpr(s64) = nnan G_FMINNUM %7, %15 - $vgpr0_vgpr1 = COPY %9(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C]](f64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f64) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C1]](f64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST3]](i64) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY2]](i64) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f64) = nnan G_FMAXNUM [[BITCAST4]], [[FMUL]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 1.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C2]](f64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST5]](i64) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY3]](i64) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f64) = nnan G_FMINNUM [[FMAXNUM]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i64) = G_BITCAST [[FMINNUM]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](i64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(f64) = G_FCONSTANT double 2.000000e+00 + %2:sgpr(i64) = G_BITCAST %1(f64) + %3:vgpr(i64) = COPY %2(i64) + %4:vgpr(f64) = G_BITCAST %0(i64) + %5:vgpr(f64) = G_BITCAST %3(i64) + %6:vgpr(f64) = G_FMUL %4, %5 + %7:sgpr(f64) = G_FCONSTANT double 0.000000e+00 + %8:sgpr(i64) = G_BITCAST %7(f64) + %9:vgpr(i64) = COPY %8(i64) + %10:vgpr(f64) = G_BITCAST %9(i64) + %11:vgpr(f64) = nnan G_FMAXNUM %10, %6 + %12:sgpr(f64) = G_FCONSTANT double 1.000000e+00 + %13:sgpr(i64) = G_BITCAST %12(f64) + %14:vgpr(i64) = COPY %13(i64) + %15:vgpr(f64) = G_BITCAST %14(i64) + %16:vgpr(f64) = nnan G_FMINNUM %11, %15 + %17:vgpr(i64) = G_BITCAST %16(f64) + $vgpr0_vgpr1 = COPY %17(i64) ... --- @@ -87,29 +129,50 @@ body: | ; CHECK-LABEL: name: test_min_K1max_ValK0_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s16) = G_FCANONICALIZE [[FMUL]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = G_AMDGPU_CLAMP [[FCANONICALIZE]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %12:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = G_FMUL %0, %12 - %5:sgpr(s16) = G_FCONSTANT half 0xH0000 - %11:vgpr(s16) = G_FCANONICALIZE %4 - %13:vgpr(s16) = COPY %5(s16) - %6:vgpr(s16) = G_FMAXNUM_IEEE %11, %13 - %7:sgpr(s16) = G_FCONSTANT half 0xH3C00 - %14:vgpr(s16) = COPY %7(s16) - %8:vgpr(s16) = G_FMINNUM_IEEE %14, %6 - %10:vgpr(s32) = G_ANYEXT %8(s16) - $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f16) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f16) = G_FCANONICALIZE [[FMUL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY2]](i16) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST5]](i16) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY3]](i16) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f16) = G_FMINNUM_IEEE [[BITCAST6]], [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[BITCAST7]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4000 + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:vgpr(i16) = COPY %3(i16) + %5:vgpr(f16) = G_BITCAST %1(i16) + %6:vgpr(f16) = G_BITCAST %4(i16) + %7:vgpr(f16) = G_FMUL %5, %6 + %8:sgpr(f16) = G_FCONSTANT half 0xH0000 + %9:vgpr(f16) = G_FCANONICALIZE %7 + %10:sgpr(i16) = G_BITCAST %8(f16) + %11:vgpr(i16) = COPY %10(i16) + %12:vgpr(f16) = G_BITCAST %11(i16) + %13:vgpr(f16) = G_FMAXNUM_IEEE %9, %12 + %14:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %15:sgpr(i16) = G_BITCAST %14(f16) + %16:vgpr(i16) = COPY %15(i16) + %17:vgpr(f16) = G_BITCAST %16(i16) + %18:vgpr(f16) = G_FMINNUM_IEEE %17, %13 + %19:vgpr(i16) = G_BITCAST %18(f16) + %20:vgpr(i32) = G_ANYEXT %19(i16) + $vgpr0 = COPY %20(i32) ... --- @@ -128,31 +191,56 @@ body: | ; CHECK-LABEL: name: test_min_K1max_K0Val_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](<2 x s16>) - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %12:sgpr(s32) = G_ANYEXT %3(s16) - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %12(s32), %12(s32) - %6:sgpr(s16) = G_FCONSTANT half 0xH0000 - %13:sgpr(s32) = G_ANYEXT %6(s16) - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %13(s32), %13(s32) - %9:sgpr(s16) = G_FCONSTANT half 0xH3C00 - %14:sgpr(s32) = G_ANYEXT %9(s16) - %8:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %14(s32), %14(s32) - %15:vgpr(<2 x s16>) = COPY %2(<2 x s16>) - %4:vgpr(<2 x s16>) = G_FMUL %0, %15 - %16:vgpr(<2 x s16>) = COPY %5(<2 x s16>) - %7:vgpr(<2 x s16>) = nnan G_FMAXNUM %16, %4 - %17:vgpr(<2 x s16>) = COPY %8(<2 x s16>) - %10:vgpr(<2 x s16>) = nnan G_FMINNUM %17, %7 - $vgpr0 = COPY %10(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](i32), [[ANYEXT]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST2]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x f16>) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(<2 x f16>) = nnan G_FMAXNUM [[BITCAST5]], [[FMUL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC2]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY3]](<2 x i16>) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(<2 x f16>) = nnan G_FMINNUM [[BITCAST6]], [[FMAXNUM]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(<2 x i16>) = G_BITCAST [[FMINNUM]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(f16) = G_FCONSTANT half 0xH4000 + %2:sgpr(i16) = G_BITCAST %1(f16) + %3:sgpr(i32) = G_ANYEXT %2(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %3(i32) + %5:sgpr(f16) = G_FCONSTANT half 0xH0000 + %6:sgpr(i16) = G_BITCAST %5(f16) + %7:sgpr(i32) = G_ANYEXT %6(i16) + %8:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %7(i32), %7(i32) + %9:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %10:sgpr(i16) = G_BITCAST %9(f16) + %11:sgpr(i32) = G_ANYEXT %10(i16) + %12:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %11(i32), %11(i32) + %13:vgpr(<2 x i16>) = COPY %4(<2 x i16>) + %14:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %15:vgpr(<2 x f16>) = G_BITCAST %13(<2 x i16>) + %16:vgpr(<2 x f16>) = G_FMUL %14, %15 + %17:vgpr(<2 x i16>) = COPY %8(<2 x i16>) + %18:vgpr(<2 x f16>) = G_BITCAST %17(<2 x i16>) + %19:vgpr(<2 x f16>) = nnan G_FMAXNUM %18, %16 + %20:vgpr(<2 x i16>) = COPY %12(<2 x i16>) + %21:vgpr(<2 x f16>) = G_BITCAST %20(<2 x i16>) + %22:vgpr(<2 x f16>) = nnan G_FMINNUM %21, %19 + %23:vgpr(<2 x i16>) = G_BITCAST %22(<2 x f16>) + $vgpr0 = COPY %23(<2 x i16>) ... --- @@ -171,34 +259,60 @@ body: | ; CHECK-LABEL: name: test_min_max_splat_padded_with_undef ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[FMUL]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_CLAMP [[FCANONICALIZE]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](<2 x s16>) - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %17:sgpr(s32) = G_ANYEXT %3(s16) - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %17(s32), %17(s32) - %6:sgpr(s16) = G_FCONSTANT half 0xH0000 - %18:sgpr(s32) = G_ANYEXT %6(s16) - %19:sgpr(s32) = G_IMPLICIT_DEF - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %18(s32), %19(s32) - %10:sgpr(s16) = G_FCONSTANT half 0xH3C00 - %20:sgpr(s32) = G_ANYEXT %10(s16) - %9:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %20(s32), %19(s32) - %21:vgpr(<2 x s16>) = COPY %2(<2 x s16>) - %4:vgpr(<2 x s16>) = G_FMUL %0, %21 - %16:vgpr(<2 x s16>) = G_FCANONICALIZE %4 - %22:vgpr(<2 x s16>) = COPY %5(<2 x s16>) - %8:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %22, %16 - %23:vgpr(<2 x s16>) = COPY %9(<2 x s16>) - %11:vgpr(<2 x s16>) = G_FMINNUM_IEEE %23, %8 - $vgpr0 = COPY %11(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](i32), [[ANYEXT]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST2]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x f16>) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x f16>) = G_FCANONICALIZE [[FMUL]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x f16>) = G_FMAXNUM_IEEE [[BITCAST5]], [[FCANONICALIZE]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC2]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY3]](<2 x i16>) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x f16>) = G_FMINNUM_IEEE [[BITCAST6]], [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(f16) = G_FCONSTANT half 0xH4000 + %2:sgpr(i16) = G_BITCAST %1(f16) + %3:sgpr(i32) = G_ANYEXT %2(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %3(i32) + %5:sgpr(f16) = G_FCONSTANT half 0xH0000 + %6:sgpr(i16) = G_BITCAST %5(f16) + %7:sgpr(i32) = G_ANYEXT %6(i16) + %8:sgpr(i32) = G_IMPLICIT_DEF + %9:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %7(i32), %8(i32) + %10:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %11:sgpr(i16) = G_BITCAST %10(f16) + %12:sgpr(i32) = G_ANYEXT %11(i16) + %13:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %12(i32), %8(i32) + %14:vgpr(<2 x i16>) = COPY %4(<2 x i16>) + %15:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %16:vgpr(<2 x f16>) = G_BITCAST %14(<2 x i16>) + %17:vgpr(<2 x f16>) = G_FMUL %15, %16 + %18:vgpr(<2 x f16>) = G_FCANONICALIZE %17 + %19:vgpr(<2 x i16>) = COPY %9(<2 x i16>) + %20:vgpr(<2 x f16>) = G_BITCAST %19(<2 x i16>) + %21:vgpr(<2 x f16>) = G_FMAXNUM_IEEE %20, %18 + %22:vgpr(<2 x i16>) = COPY %13(<2 x i16>) + %23:vgpr(<2 x f16>) = G_BITCAST %22(<2 x i16>) + %24:vgpr(<2 x f16>) = G_FMINNUM_IEEE %23, %21 + %25:vgpr(<2 x i16>) = G_BITCAST %24(<2 x f16>) + $vgpr0 = COPY %25(<2 x i16>) ... --- @@ -214,23 +328,44 @@ body: | ; CHECK-LABEL: name: test_max_min_ValK1_K0_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %9:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %9 - %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %10:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %10 - %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %11:vgpr(s32) = COPY %6(s32) - %7:vgpr(s32) = nnan G_FMAXNUM_IEEE %5, %11 - $vgpr0 = COPY %7(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[FMUL]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMINNUM_IEEE %6, %10 + %12:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %13:sgpr(i32) = G_BITCAST %12(f32) + %14:vgpr(i32) = COPY %13(i32) + %15:vgpr(f32) = G_BITCAST %14(i32) + %16:vgpr(f32) = nnan G_FMAXNUM_IEEE %11, %15 + %17:vgpr(i32) = G_BITCAST %16(f32) + $vgpr0 = COPY %17(i32) ... --- @@ -249,23 +384,44 @@ body: | ; CHECK-LABEL: name: test_max_min_K1Val_K0_f64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s64) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s64) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_CLAMP]](s64) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00 - %13:vgpr(s64) = COPY %4(s64) - %5:vgpr(s64) = G_FMUL %0, %13 - %6:sgpr(s64) = G_FCONSTANT double 1.000000e+00 - %14:vgpr(s64) = COPY %6(s64) - %7:vgpr(s64) = nnan G_FMINNUM %14, %5 - %8:sgpr(s64) = G_FCONSTANT double 0.000000e+00 - %15:vgpr(s64) = COPY %8(s64) - %9:vgpr(s64) = nnan G_FMAXNUM %7, %15 - $vgpr0_vgpr1 = COPY %9(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C]](f64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f64) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 1.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C1]](f64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST3]](i64) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY2]](i64) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f64) = nnan G_FMINNUM [[BITCAST4]], [[FMUL]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C2]](f64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST5]](i64) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY3]](i64) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f64) = nnan G_FMAXNUM [[FMINNUM]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i64) = G_BITCAST [[FMAXNUM]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST7]](i64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(f64) = G_FCONSTANT double 2.000000e+00 + %2:sgpr(i64) = G_BITCAST %1(f64) + %3:vgpr(i64) = COPY %2(i64) + %4:vgpr(f64) = G_BITCAST %0(i64) + %5:vgpr(f64) = G_BITCAST %3(i64) + %6:vgpr(f64) = G_FMUL %4, %5 + %7:sgpr(f64) = G_FCONSTANT double 1.000000e+00 + %8:sgpr(i64) = G_BITCAST %7(f64) + %9:vgpr(i64) = COPY %8(i64) + %10:vgpr(f64) = G_BITCAST %9(i64) + %11:vgpr(f64) = nnan G_FMINNUM %10, %6 + %12:sgpr(f64) = G_FCONSTANT double 0.000000e+00 + %13:sgpr(i64) = G_BITCAST %12(f64) + %14:vgpr(i64) = COPY %13(i64) + %15:vgpr(f64) = G_BITCAST %14(i64) + %16:vgpr(f64) = nnan G_FMAXNUM %11, %15 + %17:vgpr(i64) = G_BITCAST %16(f64) + $vgpr0_vgpr1 = COPY %17(i64) ... --- @@ -284,27 +440,48 @@ body: | ; CHECK-LABEL: name: test_max_K0min_ValK1_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %11:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = G_FMUL %0, %11 - %5:sgpr(s16) = G_FCONSTANT half 0xH3C00 - %12:vgpr(s16) = COPY %5(s16) - %6:vgpr(s16) = nnan G_FMINNUM_IEEE %4, %12 - %7:sgpr(s16) = G_FCONSTANT half 0xH0000 - %13:vgpr(s16) = COPY %7(s16) - %8:vgpr(s16) = nnan G_FMAXNUM_IEEE %13, %6 - %10:vgpr(s32) = G_ANYEXT %8(s16) - $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f16) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY2]](i16) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f16) = nnan G_FMINNUM_IEEE [[FMUL]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST5]](i16) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY3]](i16) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f16) = nnan G_FMAXNUM_IEEE [[BITCAST6]], [[FMINNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[BITCAST7]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4000 + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:vgpr(i16) = COPY %3(i16) + %5:vgpr(f16) = G_BITCAST %1(i16) + %6:vgpr(f16) = G_BITCAST %4(i16) + %7:vgpr(f16) = G_FMUL %5, %6 + %8:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %9:sgpr(i16) = G_BITCAST %8(f16) + %10:vgpr(i16) = COPY %9(i16) + %11:vgpr(f16) = G_BITCAST %10(i16) + %12:vgpr(f16) = nnan G_FMINNUM_IEEE %7, %11 + %13:sgpr(f16) = G_FCONSTANT half 0xH0000 + %14:sgpr(i16) = G_BITCAST %13(f16) + %15:vgpr(i16) = COPY %14(i16) + %16:vgpr(f16) = G_BITCAST %15(i16) + %17:vgpr(f16) = nnan G_FMAXNUM_IEEE %16, %12 + %18:vgpr(i16) = G_BITCAST %17(f16) + %19:vgpr(i32) = G_ANYEXT %18(i16) + $vgpr0 = COPY %19(i32) ... --- @@ -323,32 +500,58 @@ body: | ; CHECK-LABEL: name: test_max_K0min_K1Val_v2f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](<2 x s16>) - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %13:sgpr(s32) = G_ANYEXT %3(s16) - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %13(s32), %13(s32) - %6:sgpr(s16) = G_FCONSTANT half 0xH3C00 - %14:sgpr(s32) = G_ANYEXT %6(s16) - %15:sgpr(s32) = G_IMPLICIT_DEF - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %14(s32), %15(s32) - %10:sgpr(s16) = G_FCONSTANT half 0xH0000 - %16:sgpr(s32) = G_ANYEXT %10(s16) - %9:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %15(s32), %16(s32) - %17:vgpr(<2 x s16>) = COPY %2(<2 x s16>) - %4:vgpr(<2 x s16>) = G_FMUL %0, %17 - %18:vgpr(<2 x s16>) = COPY %5(<2 x s16>) - %8:vgpr(<2 x s16>) = nnan G_FMINNUM %18, %4 - %19:vgpr(<2 x s16>) = COPY %9(<2 x s16>) - %11:vgpr(<2 x s16>) = nnan G_FMAXNUM %19, %8 - $vgpr0 = COPY %11(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](i32), [[ANYEXT]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](i32), [[DEF]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C2]](f16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST2]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[DEF]](i32), [[ANYEXT2]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x f16>) = G_FMUL [[BITCAST3]], [[BITCAST4]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(<2 x f16>) = nnan G_FMINNUM [[BITCAST5]], [[FMUL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC2]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY3]](<2 x i16>) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(<2 x f16>) = nnan G_FMAXNUM [[BITCAST6]], [[FMINNUM]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(<2 x i16>) = G_BITCAST [[FMAXNUM]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](<2 x i16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(f16) = G_FCONSTANT half 0xH4000 + %2:sgpr(i16) = G_BITCAST %1(f16) + %3:sgpr(i32) = G_ANYEXT %2(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %3(i32) + %5:sgpr(f16) = G_FCONSTANT half 0xH3C00 + %6:sgpr(i16) = G_BITCAST %5(f16) + %7:sgpr(i32) = G_ANYEXT %6(i16) + %8:sgpr(i32) = G_IMPLICIT_DEF + %9:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %7(i32), %8(i32) + %10:sgpr(f16) = G_FCONSTANT half 0xH0000 + %11:sgpr(i16) = G_BITCAST %10(f16) + %12:sgpr(i32) = G_ANYEXT %11(i16) + %13:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %8(i32), %12(i32) + %14:vgpr(<2 x i16>) = COPY %4(<2 x i16>) + %15:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %16:vgpr(<2 x f16>) = G_BITCAST %14(<2 x i16>) + %17:vgpr(<2 x f16>) = G_FMUL %15, %16 + %18:vgpr(<2 x i16>) = COPY %9(<2 x i16>) + %19:vgpr(<2 x f16>) = G_BITCAST %18(<2 x i16>) + %20:vgpr(<2 x f16>) = nnan G_FMINNUM %19, %17 + %21:vgpr(<2 x i16>) = COPY %13(<2 x i16>) + %22:vgpr(<2 x f16>) = G_BITCAST %21(<2 x i16>) + %23:vgpr(<2 x f16>) = nnan G_FMAXNUM %22, %20 + %24:vgpr(<2 x i16>) = G_BITCAST %23(<2 x f16>) + $vgpr0 = COPY %24(<2 x i16>) ... # FixMe: add tests with attributes #3 = {"no-nans-fp-math"="true"} @@ -369,22 +572,34 @@ body: | ; CHECK-LABEL: name: test_min_max_K0_gt_K1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMAXNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMINNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -403,22 +618,34 @@ body: | ; CHECK-LABEL: name: test_max_min_K0_gt_K1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMINNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMAXNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -437,28 +664,44 @@ body: | ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %9:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %9 - %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %10:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMAXNUM %3, %10 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %11:vgpr(s32) = COPY %6(s32) - %7:vgpr(s32) = G_FMINNUM %5, %11 - $vgpr0 = COPY %7(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f32) = G_FMAXNUM [[FMUL]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f32) = G_FMINNUM [[FMAXNUM]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = G_FMAXNUM %6, %10 + %12:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %13:sgpr(i32) = G_BITCAST %12(f32) + %14:vgpr(i32) = COPY %13(i32) + %15:vgpr(f32) = G_BITCAST %14(i32) + %16:vgpr(f32) = G_FMINNUM %11, %15 + %17:vgpr(i32) = G_BITCAST %16(f32) + $vgpr0 = COPY %17(i32) ... --- @@ -477,29 +720,46 @@ body: | ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %10:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %10 - %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %9:vgpr(s32) = G_FCANONICALIZE %3 - %11:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMAXNUM_IEEE %9, %11 - %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %12:vgpr(s32) = COPY %6(s32) - %7:vgpr(s32) = G_FMINNUM_IEEE %5, %12 - $vgpr0 = COPY %7(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[FMUL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %8:vgpr(f32) = G_FCANONICALIZE %6 + %9:sgpr(i32) = G_BITCAST %7(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:vgpr(f32) = G_BITCAST %10(i32) + %12:vgpr(f32) = G_FMAXNUM_IEEE %8, %11 + %13:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %14:sgpr(i32) = G_BITCAST %13(f32) + %15:vgpr(i32) = COPY %14(i32) + %16:vgpr(f32) = G_BITCAST %15(i32) + %17:vgpr(f32) = G_FMINNUM_IEEE %12, %16 + %18:vgpr(i32) = G_BITCAST %17(f32) + $vgpr0 = COPY %18(i32) ... --- @@ -518,30 +778,46 @@ body: | ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_true ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %10:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %10 - %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %9:vgpr(s32) = G_FCANONICALIZE %3 - %11:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMINNUM_IEEE %9, %11 - %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %12:vgpr(s32) = COPY %6(s32) - %7:vgpr(s32) = G_FMAXNUM_IEEE %5, %12 - $vgpr0 = COPY %7(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[FMUL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %8:vgpr(f32) = G_FCANONICALIZE %6 + %9:sgpr(i32) = G_BITCAST %7(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:vgpr(f32) = G_BITCAST %10(i32) + %12:vgpr(f32) = G_FMINNUM_IEEE %8, %11 + %13:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %14:sgpr(i32) = G_BITCAST %13(f32) + %15:vgpr(i32) = COPY %14(i32) + %16:vgpr(f32) = G_BITCAST %15(i32) + %17:vgpr(f32) = G_FMAXNUM_IEEE %12, %16 + %18:vgpr(i32) = G_BITCAST %17(f32) + $vgpr0 = COPY %18(i32) ... --- @@ -560,26 +836,42 @@ body: | ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %9:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMUL %0, %9 - %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %10:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMINNUM %3, %10 - %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %11:vgpr(s32) = COPY %6(s32) - %7:vgpr(s32) = G_FMAXNUM %5, %11 - $vgpr0 = COPY %7(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f32) = G_FMINNUM [[FMUL]], [[BITCAST4]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C2]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST5]](i32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f32) = G_FMAXNUM [[FMINNUM]], [[BITCAST6]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST7]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMUL %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = G_FMINNUM %6, %10 + %12:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %13:sgpr(i32) = G_BITCAST %12(f32) + %14:vgpr(i32) = COPY %13(i32) + %15:vgpr(f32) = G_BITCAST %14(i32) + %16:vgpr(f32) = G_FMAXNUM %11, %15 + %17:vgpr(i32) = G_BITCAST %16(f32) + $vgpr0 = COPY %17(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir index 2f41d86100040..542cec1212497 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir @@ -16,21 +16,34 @@ body: | ; CHECK-LABEL: name: test_min_max_ValK0_K1_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMAXNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMINNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -48,21 +61,34 @@ body: | ; CHECK-LABEL: name: test_min_max_K0Val_K1_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMAXNUM %7, %0 - %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMINNUM %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM [[FMAXNUM]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %3(i32) + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = nnan G_FMAXNUM %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMINNUM %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -80,27 +106,40 @@ body: | ; CHECK-LABEL: name: test_min_K1max_ValK0_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s16) = G_FCANONICALIZE [[TRUNC]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %9:vgpr(s16) = G_FCANONICALIZE %0 - %10:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = G_FMAXNUM_IEEE %9, %10 - %5:sgpr(s16) = G_FCONSTANT half 0xH4400 - %11:vgpr(s16) = COPY %5(s16) - %6:vgpr(s16) = G_FMINNUM_IEEE %11, %4 - %8:vgpr(s32) = G_ANYEXT %6(s16) - $vgpr0 = COPY %8(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f16) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST1]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY2]](i16) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f16) = G_FMINNUM_IEEE [[BITCAST4]], [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMINNUM_IEEE]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[BITCAST5]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4000 + %3:vgpr(f16) = G_BITCAST %1(i16) + %4:vgpr(f16) = G_FCANONICALIZE %3 + %5:sgpr(i16) = G_BITCAST %2(f16) + %6:vgpr(i16) = COPY %5(i16) + %7:vgpr(f16) = G_BITCAST %6(i16) + %8:vgpr(f16) = G_FMAXNUM_IEEE %4, %7 + %9:sgpr(f16) = G_FCONSTANT half 0xH4400 + %10:sgpr(i16) = G_BITCAST %9(f16) + %11:vgpr(i16) = COPY %10(i16) + %12:vgpr(f16) = G_BITCAST %11(i16) + %13:vgpr(f16) = G_FMINNUM_IEEE %12, %8 + %14:vgpr(i16) = G_BITCAST %13(f16) + %15:vgpr(i32) = G_ANYEXT %14(i16) + $vgpr0 = COPY %15(i32) ... @@ -119,25 +158,38 @@ body: | ; CHECK-LABEL: name: test_min_K1max_K0Val_f16 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %9:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = nnan G_FMAXNUM %9, %0 - %5:sgpr(s16) = G_FCONSTANT half 0xH4400 - %10:vgpr(s16) = COPY %5(s16) - %6:vgpr(s16) = nnan G_FMINNUM %10, %4 - %8:vgpr(s32) = G_ANYEXT %6(s16) - $vgpr0 = COPY %8(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f16) = nnan G_FMAXNUM [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY2]](i16) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f16) = nnan G_FMINNUM [[BITCAST4]], [[FMAXNUM]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMINNUM]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[BITCAST5]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4000 + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:vgpr(i16) = COPY %3(i16) + %5:vgpr(f16) = G_BITCAST %4(i16) + %6:vgpr(f16) = G_BITCAST %1(i16) + %7:vgpr(f16) = nnan G_FMAXNUM %5, %6 + %8:sgpr(f16) = G_FCONSTANT half 0xH4400 + %9:sgpr(i16) = G_BITCAST %8(f16) + %10:vgpr(i16) = COPY %9(i16) + %11:vgpr(f16) = G_BITCAST %10(i16) + %12:vgpr(f16) = nnan G_FMINNUM %11, %7 + %13:vgpr(i16) = G_BITCAST %12(f16) + %14:vgpr(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) ... --- @@ -155,21 +207,34 @@ body: | ; CHECK-LABEL: name: test_max_min_ValK1_K0_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMINNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMAXNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -187,21 +252,34 @@ body: | ; CHECK-LABEL: name: test_max_min_K1Val_K0_f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMINNUM %7, %0 - %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMAXNUM %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM [[FMINNUM]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %3(i32) + %5:vgpr(f32) = G_BITCAST %0(i32) + %6:vgpr(f32) = nnan G_FMINNUM %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMAXNUM %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -219,25 +297,38 @@ body: | ; CHECK-LABEL: name: test_max_K0min_ValK1_f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4400 - %9:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = nnan G_FMINNUM_IEEE %0, %9 - %5:sgpr(s16) = G_FCONSTANT half 0xH4000 - %10:vgpr(s16) = COPY %5(s16) - %6:vgpr(s16) = nnan G_FMAXNUM_IEEE %10, %4 - %8:vgpr(s32) = G_ANYEXT %6(s16) - $vgpr0 = COPY %8(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f16) = nnan G_FMINNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY2]](i16) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f16) = nnan G_FMAXNUM_IEEE [[BITCAST4]], [[FMINNUM_IEEE]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMAXNUM_IEEE]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[BITCAST5]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4400 + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:vgpr(i16) = COPY %3(i16) + %5:vgpr(f16) = G_BITCAST %1(i16) + %6:vgpr(f16) = G_BITCAST %4(i16) + %7:vgpr(f16) = nnan G_FMINNUM_IEEE %5, %6 + %8:sgpr(f16) = G_FCONSTANT half 0xH4000 + %9:sgpr(i16) = G_BITCAST %8(f16) + %10:vgpr(i16) = COPY %9(i16) + %11:vgpr(f16) = G_BITCAST %10(i16) + %12:vgpr(f16) = nnan G_FMAXNUM_IEEE %11, %7 + %13:vgpr(i16) = G_BITCAST %12(f16) + %14:vgpr(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) ... --- @@ -255,25 +346,38 @@ body: | ; CHECK-LABEL: name: test_max_K0min_K1Val_f16 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %2:vgpr(s32) = COPY $vgpr0 - %0:vgpr(s16) = G_TRUNC %2(s32) - %3:sgpr(s16) = G_FCONSTANT half 0xH4400 - %9:vgpr(s16) = COPY %3(s16) - %4:vgpr(s16) = nnan G_FMINNUM %9, %0 - %5:sgpr(s16) = G_FCONSTANT half 0xH4000 - %10:vgpr(s16) = COPY %5(s16) - %6:vgpr(s16) = nnan G_FMAXNUM %10, %4 - %8:vgpr(s32) = G_ANYEXT %6(s16) - $vgpr0 = COPY %8(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST]](i16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY1]](i16) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f16) = nnan G_FMINNUM [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[BITCAST3]](i16) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f16) = G_BITCAST [[COPY2]](i16) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f16) = nnan G_FMAXNUM [[BITCAST4]], [[FMINNUM]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i16) = G_BITCAST [[FMAXNUM]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[BITCAST5]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i16) = G_TRUNC %0(i32) + %2:sgpr(f16) = G_FCONSTANT half 0xH4400 + %3:sgpr(i16) = G_BITCAST %2(f16) + %4:vgpr(i16) = COPY %3(i16) + %5:vgpr(f16) = G_BITCAST %4(i16) + %6:vgpr(f16) = G_BITCAST %1(i16) + %7:vgpr(f16) = nnan G_FMINNUM %5, %6 + %8:sgpr(f16) = G_FCONSTANT half 0xH4000 + %9:sgpr(i16) = G_BITCAST %8(f16) + %10:vgpr(i16) = COPY %9(i16) + %11:vgpr(f16) = G_BITCAST %10(i16) + %12:vgpr(f16) = nnan G_FMAXNUM %11, %7 + %13:vgpr(i16) = G_BITCAST %12(f16) + %14:vgpr(i32) = G_ANYEXT %13(i16) + $vgpr0 = COPY %14(i32) ... # FixMe: add tests with attributes #2 = {"no-nans-fp-math"="true"} @@ -293,22 +397,34 @@ body: | ; CHECK-LABEL: name: test_min_max_K0_gt_K1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMAXNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMINNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -326,22 +442,34 @@ body: | ; CHECK-LABEL: name: test_max_min_K0_gt_K1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMINNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMAXNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -359,22 +487,34 @@ body: | ; CHECK-LABEL: name: test_min_max_non_inline_const ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 8.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 8.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMAXNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 8.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = nnan G_FMAXNUM_IEEE %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 8.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = nnan G_FMINNUM_IEEE %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -392,22 +532,34 @@ body: | ; CHECK-LABEL: name: test_min_max_f64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s64) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 4.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C1]](s64) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s64) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00 - %11:vgpr(s64) = COPY %4(s64) - %5:vgpr(s64) = nnan G_FMAXNUM_IEEE %0, %11 - %6:sgpr(s64) = G_FCONSTANT double 4.000000e+00 - %12:vgpr(s64) = COPY %6(s64) - %7:vgpr(s64) = nnan G_FMINNUM_IEEE %5, %12 - $vgpr0_vgpr1 = COPY %7(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C]](f64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST]](i64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f64) = nnan G_FMAXNUM_IEEE [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 4.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i64) = G_BITCAST [[C1]](f64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[BITCAST3]](i64) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY2]](i64) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f64) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i64) = G_BITCAST [[FMINNUM_IEEE]](f64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST5]](i64) + %0:vgpr(i64) = COPY $vgpr0_vgpr1 + %1:sgpr(f64) = G_FCONSTANT double 2.000000e+00 + %2:sgpr(i64) = G_BITCAST %1(f64) + %3:vgpr(i64) = COPY %2(i64) + %4:vgpr(f64) = G_BITCAST %0(i64) + %5:vgpr(f64) = G_BITCAST %3(i64) + %6:vgpr(f64) = nnan G_FMAXNUM_IEEE %4, %5 + %7:sgpr(f64) = G_FCONSTANT double 4.000000e+00 + %8:sgpr(i64) = G_BITCAST %7(f64) + %9:vgpr(i64) = COPY %8(i64) + %10:vgpr(f64) = G_BITCAST %9(i64) + %11:vgpr(f64) = nnan G_FMINNUM_IEEE %6, %10 + %12:vgpr(i64) = G_BITCAST %11(f64) + $vgpr0_vgpr1 = COPY %12(i64) ... --- @@ -425,30 +577,42 @@ body: | ; CHECK-LABEL: name: test_min_max_v2f16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %3:sgpr(s16) = G_FCONSTANT half 0xH4000 - %9:sgpr(s32) = G_ANYEXT %3(s16) - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) - %6:sgpr(s16) = G_FCONSTANT half 0xH4400 - %10:sgpr(s32) = G_ANYEXT %6(s16) - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32) - %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>) - %4:vgpr(<2 x s16>) = nnan G_FMAXNUM_IEEE %0, %11 - %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) - %7:vgpr(<2 x s16>) = nnan G_FMINNUM_IEEE %4, %12 - $vgpr0 = COPY %7(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](i32), [[ANYEXT]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C1]](f16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](i32), [[ANYEXT1]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x f16>) = nnan G_FMAXNUM_IEEE [[BITCAST2]], [[BITCAST3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(<2 x f16>) = G_BITCAST [[COPY2]](<2 x i16>) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x f16>) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(<2 x i16>) = G_BITCAST [[FMINNUM_IEEE]](<2 x f16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x i16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(f16) = G_FCONSTANT half 0xH4000 + %2:sgpr(i16) = G_BITCAST %1(f16) + %3:sgpr(i32) = G_ANYEXT %2(i16) + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %3(i32) + %5:sgpr(f16) = G_FCONSTANT half 0xH4400 + %6:sgpr(i16) = G_BITCAST %5(f16) + %7:sgpr(i32) = G_ANYEXT %6(i16) + %8:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %7(i32), %7(i32) + %9:vgpr(<2 x i16>) = COPY %4(<2 x i16>) + %10:vgpr(<2 x f16>) = G_BITCAST %0(<2 x i16>) + %11:vgpr(<2 x f16>) = G_BITCAST %9(<2 x i16>) + %12:vgpr(<2 x f16>) = nnan G_FMAXNUM_IEEE %10, %11 + %13:vgpr(<2 x i16>) = COPY %8(<2 x i16>) + %14:vgpr(<2 x f16>) = G_BITCAST %13(<2 x i16>) + %15:vgpr(<2 x f16>) = nnan G_FMINNUM_IEEE %12, %14 + %16:vgpr(<2 x i16>) = G_BITCAST %15(<2 x f16>) + $vgpr0 = COPY %16(<2 x i16>) ... --- @@ -466,22 +630,34 @@ body: | ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMAXNUM %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMINNUM %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f32) = G_FMAXNUM [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f32) = G_FMINNUM [[FMAXNUM]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMINNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMAXNUM %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = G_FMINNUM %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -499,22 +675,34 @@ body: | ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_false ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMINNUM %0, %7 - %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMAXNUM %3, %8 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(f32) = G_FMINNUM [[BITCAST1]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(f32) = G_FMAXNUM [[FMINNUM]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %2:sgpr(i32) = G_BITCAST %1(f32) + %3:vgpr(i32) = COPY %2(i32) + %4:vgpr(f32) = G_BITCAST %0(i32) + %5:vgpr(f32) = G_BITCAST %3(i32) + %6:vgpr(f32) = G_FMINNUM %4, %5 + %7:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %8:sgpr(i32) = G_BITCAST %7(f32) + %9:vgpr(i32) = COPY %8(i32) + %10:vgpr(f32) = G_BITCAST %9(i32) + %11:vgpr(f32) = G_FMAXNUM %6, %10 + %12:vgpr(i32) = G_BITCAST %11(f32) + $vgpr0 = COPY %12(i32) ... --- @@ -532,22 +720,34 @@ body: | ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_true ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 - %7:vgpr(s32) = G_FCANONICALIZE %0 - %8:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_FMINNUM_IEEE %7, %8 - %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 - %9:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_FMAXNUM_IEEE %3, %9 - $vgpr0 = COPY %5(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C]](f32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BITCAST2]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(i32) = G_BITCAST [[C1]](f32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[BITCAST3]](i32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(f32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[BITCAST4]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FMAXNUM_IEEE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST5]](i32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(f32) = G_FCONSTANT float 4.000000e+00 + %2:vgpr(f32) = G_BITCAST %0(i32) + %3:vgpr(f32) = G_FCANONICALIZE %2 + %4:sgpr(i32) = G_BITCAST %1(f32) + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(f32) = G_BITCAST %5(i32) + %7:vgpr(f32) = G_FMINNUM_IEEE %3, %6 + %8:sgpr(f32) = G_FCONSTANT float 2.000000e+00 + %9:sgpr(i32) = G_BITCAST %8(f32) + %10:vgpr(i32) = COPY %9(i32) + %11:vgpr(f32) = G_BITCAST %10(i32) + %12:vgpr(f32) = G_FMAXNUM_IEEE %7, %11 + %13:vgpr(i32) = G_BITCAST %12(f32) + $vgpr0 = COPY %13(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir index f87a253dcb433..bb72eae5c5f98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir @@ -12,17 +12,17 @@ body: | ; CHECK: liveins: $sgpr0, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY [[ICMP]](s32) - ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32(i32) = G_ICMP intpred(ne), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY [[ICMP]](i32) + ; CHECK-NEXT: G_STORE [[COPY2]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr(p1) = COPY $vgpr0_vgpr1 - %1:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = G_CONSTANT i32 1 - %3:sreg_32(s32) = G_ICMP intpred(ne), %1, %2 - %4:sgpr(s32) = G_AND %3, %2 - G_STORE %4(s32), %0(p1) :: (store (s32), addrspace 1) + %1:sgpr(i32) = COPY $sgpr0 + %2:sgpr(i32) = G_CONSTANT i32 1 + %3:sreg_32(i32) = G_ICMP intpred(ne), %1(i32), %2 + %4:sgpr(i32) = G_AND %3, %2 + G_STORE %4(i32), %0(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir index f18a576b56250..c768ba90f8ab4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir @@ -14,22 +14,22 @@ body: | ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 -12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMAX %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMIN %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 -12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMAX %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMIN %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -45,22 +45,22 @@ body: | ; CHECK-LABEL: name: min_max_ValK0_K1_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 -12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMAX %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMIN %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 -12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMAX %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMIN %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -76,22 +76,22 @@ body: | ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 -12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMAX %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMIN %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 -12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMAX %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMIN %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -107,22 +107,22 @@ body: | ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 -12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMAX %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMIN %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 -12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMAX %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMIN %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -138,22 +138,22 @@ body: | ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMIN %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 -12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMAX %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMIN %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 -12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMAX %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -169,22 +169,22 @@ body: | ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMIN %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 -12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMAX %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMIN %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 -12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMAX %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -200,22 +200,22 @@ body: | ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMIN %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 -12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMAX %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMIN %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 -12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMAX %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -231,22 +231,22 @@ body: | ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMIN %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 -12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMAX %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMIN %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 -12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMAX %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -262,27 +262,27 @@ body: | ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY1]], [[COPY]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[SMIN]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_SMIN [[COPY1]], [[COPY]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_SMAX [[COPY2]], [[SMIN]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x i16>) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %9:sgpr(s32) = G_CONSTANT i32 17 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) - %10:sgpr(s32) = G_CONSTANT i32 -12 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32) - %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>) - %4:vgpr(<2 x s16>) = G_SMIN %11, %0 - %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) - %7:vgpr(<2 x s16>) = G_SMAX %12, %4 - $vgpr0 = COPY %7(<2 x s16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %1(i32), %1(i32) + %3:sgpr(i32) = G_CONSTANT i32 -12 + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %3(i32) + %5:vgpr(<2 x i16>) = COPY %2(<2 x i16>) + %6:vgpr(<2 x i16>) = G_SMIN %5, %0 + %7:vgpr(<2 x i16>) = COPY %4(<2 x i16>) + %8:vgpr(<2 x i16>) = G_SMAX %7, %6 + $vgpr0 = COPY %8(<2 x i16>) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -298,23 +298,23 @@ body: | ; CHECK-LABEL: name: test_uniform_min_max ; CHECK: liveins: $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(i32) = G_SMAX [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(i32) = G_SMIN [[SMAX]], [[C1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[SMIN]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 -12 - %4:sgpr(s32) = G_SMAX %0, %3 - %5:sgpr(s32) = G_CONSTANT i32 17 - %6:sgpr(s32) = G_SMIN %4, %5 - %8:vgpr(s32) = COPY %6(s32) - %7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32) - $sgpr0 = COPY %7(s32) + %0:sgpr(i32) = COPY $sgpr2 + %1:sgpr(i32) = G_CONSTANT i32 -12 + %2:sgpr(i32) = G_SMAX %0, %1 + %3:sgpr(i32) = G_CONSTANT i32 17 + %4:sgpr(i32) = G_SMIN %2, %3 + %5:vgpr(i32) = COPY %4(i32) + %6:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %5(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -330,21 +330,21 @@ body: | ; CHECK-LABEL: name: test_non_inline_constant_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 -12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_SMAX %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 65 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_SMIN %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 -12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_SMAX %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 65 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_SMIN %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir index ef05a5274462a..102675969b594 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir @@ -14,22 +14,22 @@ body: | ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMAX %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMIN %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMAX %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMIN %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -45,22 +45,22 @@ body: | ; CHECK-LABEL: name: min_max_ValK0_K1_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMAX %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMIN %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMAX %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMIN %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -76,22 +76,22 @@ body: | ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMAX %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMIN %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMAX %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMIN %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -107,22 +107,22 @@ body: | ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMAX %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 17 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMIN %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMAX %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 17 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMIN %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -138,22 +138,22 @@ body: | ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMIN %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMAX %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMIN %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMAX %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -169,22 +169,22 @@ body: | ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMIN %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMAX %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMIN %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMAX %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -200,22 +200,22 @@ body: | ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMIN %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMAX %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMIN %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMAX %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -231,22 +231,22 @@ body: | ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 17 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMIN %7, %0 - %4:sgpr(s32) = G_CONSTANT i32 12 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMAX %8, %3 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMIN %2, %0 + %4:sgpr(i32) = G_CONSTANT i32 12 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMAX %5, %3 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -262,27 +262,27 @@ body: | ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY1]], [[COPY]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[UMIN]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[C]](i32), [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[C1]](i32), [[C1]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_UMIN [[COPY1]], [[COPY]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x i16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_UMAX [[COPY2]], [[UMIN]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x i16>) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %9:sgpr(s32) = G_CONSTANT i32 17 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) - %10:sgpr(s32) = G_CONSTANT i32 12 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32) - %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>) - %4:vgpr(<2 x s16>) = G_UMIN %11, %0 - %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) - %7:vgpr(<2 x s16>) = G_UMAX %12, %4 - $vgpr0 = COPY %7(<2 x s16>) + %0:vgpr(<2 x i16>) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 17 + %2:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %1(i32), %1(i32) + %3:sgpr(i32) = G_CONSTANT i32 12 + %4:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC %3(i32), %3(i32) + %5:vgpr(<2 x i16>) = COPY %2(<2 x i16>) + %6:vgpr(<2 x i16>) = G_UMIN %5, %0 + %7:vgpr(<2 x i16>) = COPY %4(<2 x i16>) + %8:vgpr(<2 x i16>) = G_UMAX %7, %6 + $vgpr0 = COPY %8(<2 x i16>) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... @@ -298,23 +298,23 @@ body: | ; CHECK-LABEL: name: test_uniform_min_max ; CHECK: liveins: $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(i32) = G_UMAX [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(i32) = G_UMIN [[UMAX]], [[C1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[UMIN]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](i32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 - %0:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 12 - %4:sgpr(s32) = G_UMAX %0, %3 - %5:sgpr(s32) = G_CONSTANT i32 17 - %6:sgpr(s32) = G_UMIN %4, %5 - %8:vgpr(s32) = COPY %6(s32) - %7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32) - $sgpr0 = COPY %7(s32) + %0:sgpr(i32) = COPY $sgpr2 + %1:sgpr(i32) = G_CONSTANT i32 12 + %2:sgpr(i32) = G_UMAX %0, %1 + %3:sgpr(i32) = G_CONSTANT i32 17 + %4:sgpr(i32) = G_UMIN %2, %3 + %5:vgpr(i32) = COPY %4(i32) + %6:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %5(i32) + $sgpr0 = COPY %6(i32) SI_RETURN_TO_EPILOG implicit $sgpr0 ... @@ -331,21 +331,21 @@ body: | ; CHECK-LABEL: name: test_non_inline_constant_i32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](i32) ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 - %0:vgpr(s32) = COPY $vgpr0 - %2:sgpr(s32) = G_CONSTANT i32 12 - %7:vgpr(s32) = COPY %2(s32) - %3:vgpr(s32) = G_UMAX %0, %7 - %4:sgpr(s32) = G_CONSTANT i32 65 - %8:vgpr(s32) = COPY %4(s32) - %5:vgpr(s32) = G_UMIN %3, %8 - $vgpr0 = COPY %5(s32) + %0:vgpr(i32) = COPY $vgpr0 + %1:sgpr(i32) = G_CONSTANT i32 12 + %2:vgpr(i32) = COPY %1(i32) + %3:vgpr(i32) = G_UMAX %0, %2 + %4:sgpr(i32) = G_CONSTANT i32 65 + %5:vgpr(i32) = COPY %4(i32) + %6:vgpr(i32) = G_UMIN %3, %5 + $vgpr0 = COPY %6(i32) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir index 7f7f8b0121567..4c123c3090bac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: abs_sgpr_s16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) - %1:_(s32) = COPY $sgpr0 - %2:_(s16) = G_TRUNC %1 - %5:_(s16) = G_ABS %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(i32) = G_ABS [[SEXT]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[ABS]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_ABS %1 ... --- @@ -33,14 +33,14 @@ body: | ; CHECK-LABEL: name: abs_vgpr_s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[C]], [[TRUNC]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s16) = G_SMAX [[TRUNC]], [[SUB]] - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %1 - %5:_(s16) = G_ABS %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i16) = G_SUB [[C]], [[TRUNC]] + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(i16) = G_SMAX [[TRUNC]], [[SUB]] + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_ABS %1 ... --- @@ -54,16 +54,16 @@ body: | ; CHECK-LABEL: name: abs_sgpr_v2i16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT_INREG]] - ; CHECK-NEXT: [[ABS1:%[0-9]+]]:sgpr(s32) = G_ABS [[ASHR]] - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ABS]](s32), [[ABS1]](s32) - %1:_(<2 x s16>) = COPY $sgpr0 - %5:_(<2 x s16>) = G_ABS %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(i32) = G_ABS [[SEXT_INREG]] + ; CHECK-NEXT: [[ABS1:%[0-9]+]]:sgpr(i32) = G_ABS [[ASHR]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ABS]](i32), [[ABS1]](i32) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = G_ABS %0 ... --- @@ -77,11 +77,11 @@ body: | ; CHECK-LABEL: name: abs_vgpr_v2i16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[BUILD_VECTOR]], [[COPY]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[SUB]] - %1:_(<2 x s16>) = COPY $vgpr0 - %5:_(<2 x s16>) = G_ABS %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i16>) = G_BUILD_VECTOR [[C]](i16), [[C]](i16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x i16>) = G_SUB [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_SMAX [[COPY]], [[SUB]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_ABS %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir index 54ee69fcb2204..e21d0362467bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir @@ -11,21 +11,21 @@ body: | ; CHECK-LABEL: name: add_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[ADD]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ADD %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -38,19 +38,19 @@ body: | ; CHECK-LABEL: name: add_s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ADD %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -63,19 +63,19 @@ body: | ; CHECK-LABEL: name: add_s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i16) = G_ADD [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ADD %2, %3 + S_ENDPGM 0, implicit %4(i16) ... --- @@ -88,16 +88,16 @@ body: | ; CHECK-LABEL: name: add_s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ADD %2, %3 + S_ENDPGM 0, implicit %4(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir index 45332c2870c02..6801557ec6d18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir @@ -11,12 +11,12 @@ body: | ; CHECK-LABEL: name: add_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ADD %0, %1 ... --- @@ -29,13 +29,13 @@ body: | ; CHECK-LABEL: name: add_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_ADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_ADD %0, %1 ... --- @@ -48,13 +48,13 @@ body: | ; CHECK-LABEL: name: add_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_ADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_ADD %0, %1 ... --- @@ -67,10 +67,10 @@ body: | ; CHECK-LABEL: name: add_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_ADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_ADD %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir index 97018fac13a87..8088f88135ee9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir @@ -12,22 +12,22 @@ body: | ; CHECK-LABEL: name: add_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(i32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ADD]](i32), [[ADD1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -40,15 +40,15 @@ body: | ; CHECK-LABEL: name: add_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x i16>) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -61,13 +61,13 @@ body: | ; CHECK-LABEL: name: add_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_ADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x i16>) = G_ADD [[COPY]], [[COPY2]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_ADD %0, %1 ... --- @@ -80,12 +80,12 @@ body: | ; CHECK-LABEL: name: add_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x i16>) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir index 41a00533fedf4..a26b32c1e0db5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: exp_compr_v2f16_s ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[COPY2]](s32), [[COPY3]](s32), 0, 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr.v2f16), 0, 0, %0, %1, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[COPY2]](i32), [[COPY3]](i32), 0, 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, %0(i32), %1(i32), 0, 0 ... --- name: exp_compr_v2f16_v @@ -31,10 +31,10 @@ body: | ; CHECK-LABEL: name: exp_compr_v2f16_v ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[COPY]](s32), [[COPY1]](s32), 0, 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr.v2f16), 0, 0, %0, %1, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[COPY]](i32), [[COPY1]](i32), 0, 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, %0(i32), %1(i32), 0, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir index 313b0c5b6707c..9bfa3f3a9e0f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir @@ -25,20 +25,28 @@ body: | ; CHECK-LABEL: name: exp_s ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST3]](f32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](f32), [[COPY5]](f32), [[COPY6]](f32), [[COPY7]](f32), 0, 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(f32) = G_BITCAST %0(i32) + %5:_(f32) = G_BITCAST %1(i32) + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = G_BITCAST %3(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, %4(f32), %5(f32), %6(f32), %7(f32), 0, 0 ... --- name: exp_v @@ -50,14 +58,22 @@ body: | ; CHECK-LABEL: name: exp_v ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY3]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](f32), [[BITCAST3]](f32), 0, 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(f32) = G_BITCAST %0(i32) + %5:_(f32) = G_BITCAST %1(i32) + %6:_(f32) = G_BITCAST %2(i32) + %7:_(f32) = G_BITCAST %3(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, %4(f32), %5(f32), %6(f32), %7(f32), 0, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir index bf155eefe6129..c5a68f75ede82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -13,12 +13,12 @@ body: | ; CHECK-LABEL: name: buffer_load_ss ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[COPY1]](s32), 0 - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr4 - %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[COPY1]](i32), 0 + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $sgpr4 + %2:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %1(i32), 0 ... @@ -33,14 +33,14 @@ body: | ; CHECK-LABEL: name: buffer_load_sv ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $vgpr0 - %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x i32>), [[C1]](i32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $vgpr0 + %2:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %1(i32), 0 ... @@ -56,11 +56,11 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -68,24 +68,24 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} @@ -95,9 +95,9 @@ body: | ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .4: - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr0 - %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $sgpr0 + %2:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %1(i32), 0 ... @@ -113,10 +113,10 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -124,24 +124,24 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %8, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(i64), [[UV7:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV6]](i64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV7]](i64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C1]](i32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (i128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} @@ -151,8 +151,8 @@ body: | ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .4: - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(<4 x i32>) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %1(i32), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir index aa54b425a4db0..4dbc54e21a05b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir @@ -12,15 +12,15 @@ body: | ; CHECK-LABEL: name: ballot_sgpr_src ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INTRINSIC_CONVERGENT]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1(i1) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -33,15 +33,15 @@ body: | ; CHECK-LABEL: name: ballot_vgpr_src ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INTRINSIC_CONVERGENT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1(i1) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -54,14 +54,14 @@ body: | ; CHECK-LABEL: name: ballot_vcc_src ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INTRINSIC_CONVERGENT]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %2(i1) + S_ENDPGM 0, implicit %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir index 95e63c74a5088..deacd3b17985c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir @@ -12,14 +12,16 @@ body: | ; CHECK-LABEL: name: class_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s64), [[COPY3]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST]](f64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](f64), [[COPY3]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) ... --- @@ -33,13 +35,15 @@ body: | ; CHECK-LABEL: name: class_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s64), [[COPY1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST]](f64) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](f64), [[COPY1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) ... --- @@ -52,13 +56,15 @@ body: | ; CHECK-LABEL: name: class_vs ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY]](s64), [[COPY2]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[BITCAST]](f64), [[COPY2]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $sgpr0 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) ... --- @@ -71,10 +77,12 @@ body: | ; CHECK-LABEL: name: class_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY]](s64), [[COPY1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[BITCAST]](f64), [[COPY1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2(f64), %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir index a39287f1991e9..3ef8eb03ece38 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: cvt_pkrtz_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](s32), [[COPY3]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](f32), [[COPY3]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) ... --- name: cvt_pkrtz_sv @@ -31,13 +35,17 @@ body: | ; CHECK-LABEL: name: cvt_pkrtz_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](f32), [[BITCAST1]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) ... --- name: cvt_pkrtz_vs @@ -49,13 +57,17 @@ body: | ; CHECK-LABEL: name: cvt_pkrtz_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY1]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[BITCAST]](f32), [[COPY2]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) ... --- name: cvt_pkrtz_vv @@ -67,10 +79,14 @@ body: | ; CHECK-LABEL: name: cvt_pkrtz_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[BITCAST]](f32), [[BITCAST1]](f32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %2(f32), %3(f32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir index 3fd01395c7f21..7349c79d568a5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir @@ -12,25 +12,31 @@ body: | ; CHECK-LABEL: name: div_fmas_sss_scc ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(eq), %3, %4 - %6:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %0, %1, %2, %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY3]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY4]](f32), [[COPY5]](f32), [[COPY6]](f32), [[COPY7]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i1) = G_ICMP intpred(eq), %3(i32), %4 + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %6(f32), %7(f32), %8(f32), %5(i1) ... --- @@ -43,24 +49,30 @@ body: | ; CHECK-LABEL: name: div_fmas_sss_vcc ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[ICMP]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(eq), %3, %4 - %6:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %0, %1, %2, %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[COPY4]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY5]](f32), [[COPY6]](f32), [[COPY7]](f32), [[ICMP]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i1) = G_ICMP intpred(eq), %3(i32), %4 + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %6(f32), %7(f32), %8(f32), %5(i1) ... --- @@ -73,23 +85,29 @@ body: | ; CHECK-LABEL: name: div_fmas_vss_vcc ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY5]](s32), [[COPY6]](s32), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(eq), %3, %4 - %6:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %0, %1, %2, %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[COPY4]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[BITCAST]](f32), [[COPY5]](f32), [[COPY6]](f32), [[ICMP]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = COPY $vgpr1 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i1) = G_ICMP intpred(eq), %3(i32), %4 + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %6(f32), %7(f32), %8(f32), %5(i1) ... --- @@ -102,19 +120,25 @@ body: | ; CHECK-LABEL: name: div_fmas_vvv_vcc ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(eq), %3, %4 - %6:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %0, %1, %2, %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[COPY4]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](f32), [[ICMP]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i1) = G_ICMP intpred(eq), %3(i32), %4 + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_BITCAST %2(i32) + %9:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), %6(f32), %7(f32), %8(f32), %5(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir index ea9ec47e594af..368fd1f02e666 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: div_scale_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32), [[INT1:%[0-9]+]]:vcc(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](f32), [[COPY3]](f32), 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32), %5:_(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %2(f32), %3(f32), 0 ... --- @@ -32,13 +36,17 @@ body: | ; CHECK-LABEL: name: div_scale_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY1]](s32), 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32), [[INT1:%[0-9]+]]:vcc(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](f32), [[BITCAST1]](f32), 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32), %5:_(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %2(f32), %3(f32), 0 ... --- @@ -51,13 +59,17 @@ body: | ; CHECK-LABEL: name: div_scale_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY2]](s32), 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32), [[INT1:%[0-9]+]]:vcc(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[COPY2]](f32), 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32), %5:_(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %2(f32), %3(f32), 0 ... --- @@ -70,10 +82,14 @@ body: | ; CHECK-LABEL: name: div_scale_vv ; CHECK: liveins: $vgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32), [[INT1:%[0-9]+]]:vcc(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[BITCAST]](f32), [[BITCAST1]](f32), 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32), %5:_(f1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %2(f32), %3(f32), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir index f050616d4e626..69388731685fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir @@ -13,9 +13,9 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0 %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0 + %1:_(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0(p3), 0 ... @@ -31,8 +31,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0 %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0 + %1:_(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0(p3), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir index 9085d39e93611..0d5ae50ed3b75 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir @@ -13,13 +13,13 @@ body: | ; CHECK-LABEL: name: ds_bpermute_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](i32), [[COPY3]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), %0(i32), %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir index 071fdc8897a00..36920d6ed2847 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir @@ -13,9 +13,9 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0 %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0 + %1:_(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0(p3), 0 ... @@ -31,8 +31,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0 %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0 + %1:_(i32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0(p3), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir index 388edcb16a7a7..cd064f99526bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir @@ -13,13 +13,13 @@ body: | ; CHECK-LABEL: name: ds_gws_init_s_s ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0(i32), %1(i32) ... --- @@ -33,14 +33,14 @@ body: | ; CHECK-LABEL: name: ds_gws_init_s_v ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](i32), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0(i32), %1(i32) ... --- @@ -54,12 +54,12 @@ body: | ; CHECK-LABEL: name: ds_gws_init_v_s ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0(i32), %1(i32) ... --- @@ -73,11 +73,11 @@ body: | ; CHECK-LABEL: name: ds_gws_init_v_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](i32), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0(i32), %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir index 7fdba6490f3ec..a366f4a1c1800 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: ds_gws_init_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0(i32) ... --- @@ -30,9 +30,9 @@ body: | ; CHECK-LABEL: name: ds_gws_init_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir index 655155dc99087..da0f1ac6a5296 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir @@ -12,13 +12,13 @@ body: | ; CHECK-LABEL: name: ds_ordered_add_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](i32), [[COPY2]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... --- @@ -31,14 +31,14 @@ body: | ; CHECK-LABEL: name: ds_ordered_add_vs ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](i32), [[COPY2]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... --- @@ -51,13 +51,13 @@ body: | ; CHECK-LABEL: name: ds_ordered_add_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](i32), [[COPY1]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... --- @@ -70,10 +70,10 @@ body: | ; CHECK-LABEL: name: ds_ordered_add_sv ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](i32), [[COPY1]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir index 40a1183098245..662cbaa4fe89b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir @@ -12,13 +12,13 @@ body: | ; CHECK-LABEL: name: ds_ordered_swap_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](i32), [[COPY2]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... --- @@ -31,14 +31,14 @@ body: | ; CHECK-LABEL: name: ds_ordered_swap_vs ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](i32), [[COPY2]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... --- @@ -51,13 +51,13 @@ body: | ; CHECK-LABEL: name: ds_ordered_swap_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](i32), [[COPY1]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... --- @@ -70,10 +70,10 @@ body: | ; CHECK-LABEL: name: ds_ordered_swap_sv ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](i32), [[COPY1]](i32), 0, 0, 0, 0, 0, 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0(i32), %1(i32), 0, 0, 0, 0, 0, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir index b65898e53f810..34285b3c07048 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir @@ -13,13 +13,13 @@ body: | ; CHECK-LABEL: name: ds_permute_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](i32), [[COPY3]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), %0(i32), %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir index 2956a2f1e4f45..bc786488805f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: ds_swizzle_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](i32), 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0(i32), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir index f530d876d4069..7321eee381592 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: else ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s1), %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(i1), [[INT1:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i1), %2:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir index 97fc30233af7a..f69b639987a7d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.64.mir @@ -11,9 +11,9 @@ body: | ; CHECK-LABEL: name: else ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s1), %2:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(i1), [[INT1:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i1), %2:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir index be59d356af05e..fafa304362bc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: fcmp_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](f32), [[COPY3]](f32), 1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 1 ... --- @@ -32,13 +36,17 @@ body: | ; CHECK-LABEL: name: fcmp_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](f32), [[BITCAST1]](f32), 1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 1 ... --- @@ -51,13 +59,17 @@ body: | ; CHECK-LABEL: name: fcmp_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[BITCAST]](f32), [[COPY2]](f32), 1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 1 ... --- @@ -70,10 +82,14 @@ body: | ; CHECK-LABEL: name: fcmp_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[BITCAST]](f32), [[BITCAST1]](f32), 1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2(f32), %3(f32), 1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir index 0b83571560267..f1a797aaac2c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: fmul_legacy_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](s32), [[COPY3]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](f32), [[COPY3]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %2(f32), %3(f32) ... --- name: fmul_legacy_sv @@ -31,13 +35,17 @@ body: | ; CHECK-LABEL: name: fmul_legacy_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](f32), [[BITCAST1]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %2(f32), %3(f32) ... --- name: fmul_legacy_vs @@ -49,13 +57,17 @@ body: | ; CHECK-LABEL: name: fmul_legacy_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY1]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %1, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[BITCAST]](f32), [[COPY2]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %2(f32), %3(f32) ... --- name: fmul_legacy_vv @@ -67,10 +79,14 @@ body: | ; CHECK-LABEL: name: fmul_legacy_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[BITCAST]](f32), [[BITCAST1]](f32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %2(f32), %3(f32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.groupstaticsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.groupstaticsize.mir index c5f066de1db1c..0c650aa755763 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.groupstaticsize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.groupstaticsize.mir @@ -9,6 +9,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: groupstaticsize - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) - %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %0:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir index 434cc138f3704..d0f0f013e4ecc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: icmp_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](i32), [[COPY3]](i32), 32 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0(i32), %1(i32), 32 ... --- @@ -32,13 +32,13 @@ body: | ; CHECK-LABEL: name: icmp_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](i32), [[COPY1]](i32), 32 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0(i32), %1(i32), 32 ... --- @@ -51,13 +51,13 @@ body: | ; CHECK-LABEL: name: icmp_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](i32), [[COPY2]](i32), 32 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0(i32), %1(i32), 32 ... --- @@ -70,10 +70,10 @@ body: | ; CHECK-LABEL: name: icmp_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](i32), [[COPY1]](i32), 32 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0(i32), %1(i32), 32 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir index fed0799a6c784..26b8cabd9b224 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir @@ -13,13 +13,13 @@ body: | ; CHECK-LABEL: name: interp_mov_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), [[COPY2]](s32), 1, 1, [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), %0, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), [[COPY2]](i32), 1, 1, [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), %0(i32), 1, 1, %1(i32) ... --- @@ -33,9 +33,9 @@ body: | ; CHECK-LABEL: name: interp_mov_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), 0, 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), 0, 1, 1, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), 0, 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), 0, 1, 1, %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir index 1dac8d2ae3482..2b55089843ac4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir @@ -14,13 +14,15 @@ body: | ; CHECK-LABEL: name: interp_p1_f16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](s32), 1, 1, 1, [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %0, 1, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](f32), 1, 1, 1, [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %2(f32), 1, 1, 1, %1(i32) ... --- @@ -34,12 +36,14 @@ body: | ; CHECK-LABEL: name: interp_p1_f16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %0, 1, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](f32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %2(f32), 1, 1, 1, %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir index 150fe4e2cd062..0bb563831d7d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir @@ -13,13 +13,15 @@ body: | ; CHECK-LABEL: name: interp_p1_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](s32), 1, 1, [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](f32), 1, 1, [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %2(f32), 1, 1, %1(i32) ... --- @@ -33,12 +35,14 @@ body: | ; CHECK-LABEL: name: interp_p1_vs ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY]](s32), 1, 1, [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[BITCAST]](f32), 1, 1, [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %2(f32), 1, 1, %1(i32) ... --- @@ -52,14 +56,16 @@ body: | ; CHECK-LABEL: name: interp_p1_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](f32), 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %2(f32), 1, 1, %1(i32) ... --- @@ -73,11 +79,13 @@ body: | ; CHECK-LABEL: name: interp_p1_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[BITCAST]](f32), 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %2(f32), 1, 1, %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir index 81ee9ea80a0a9..f61f5f5090ac6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir @@ -14,16 +14,20 @@ body: | ; CHECK-LABEL: name: interp_p2_f16_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](s32), [[COPY4]](s32), 1, 1, 1, [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), %0, %1, 1, 1, 1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](f32), [[COPY4]](f32), 1, 1, 1, [[COPY2]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), %3(f32), %4(f32), 1, 1, 1, %2(i32) ... --- @@ -37,15 +41,19 @@ body: | ; CHECK-LABEL: name: interp_p2_f16_ssv ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](s32), [[COPY4]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), %0, %1, 1, 1, 1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY2]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](f32), [[COPY4]](f32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), %3(f32), %4(f32), 1, 1, 1, %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir index 31aa855bae408..8241ec9e9a980 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir @@ -13,16 +13,20 @@ body: | ; CHECK-LABEL: name: interp_p2_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](s32), [[COPY4]](s32), 1, 1, [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), %0, %1, 1, 1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](f32), [[COPY4]](f32), 1, 1, [[COPY2]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), %3(f32), %4(f32), 1, 1, %2(i32) ... --- @@ -36,15 +40,19 @@ body: | ; CHECK-LABEL: name: interp_p2_ssv ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](s32), [[COPY4]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), %0, %1, 1, 1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY2]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](f32), [[COPY4]](f32), 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), %3(f32), %4(f32), 1, 1, %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kernarg.segment.ptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kernarg.segment.ptr.mir index d1f3a42f80289..005a3dc75e25c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kernarg.segment.ptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kernarg.segment.ptr.mir @@ -10,5 +10,5 @@ body: | bb.0: ; CHECK-LABEL: name: kernarg_segment_ptr ; CHECK: [[INT:%[0-9]+]]:sgpr(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - %2:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) + %0:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir index ab224f5cd8ee7..3a8c3a555410e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir @@ -12,16 +12,16 @@ body: | ; CHECK-LABEL: name: kill_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %2(i1) ... --- @@ -34,14 +34,14 @@ body: | ; CHECK-LABEL: name: kill_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[ICMP]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %2(i1) ... --- @@ -51,12 +51,12 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: kill_constant_true - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1) - %0:_(s1) = G_CONSTANT i1 true - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0 + ; CHECK: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](i1) + %0:_(i1) = G_CONSTANT i1 true + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0(i1) ... --- @@ -66,10 +66,10 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: kill_constant_false - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1) - %0:_(s1) = G_CONSTANT i1 false - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0 + ; CHECK: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](i1) + %0:_(i1) = G_CONSTANT i1 false + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.direct.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.direct.load.mir index 3936abfcead29..351c13b08a609 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.direct.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.direct.load.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: lds_direct_load_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), %0(i32) ... --- @@ -30,9 +30,9 @@ body: | ; CHECK-LABEL: name: lds_direct_load_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.param.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.param.load.mir index f90f589ed0f58..027e502c6e0ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.param.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.param.load.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: lds_param_load_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, %0(i32) ... --- @@ -30,9 +30,9 @@ body: | ; CHECK-LABEL: name: lds_param_load_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.live.mask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.live.mask.mir index fba83f8d5a193..7a19b3f81f719 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.live.mask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.live.mask.mir @@ -9,8 +9,8 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: live_mask - ; CHECK: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.live.mask) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s1) - %0:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.live.mask) - S_ENDPGM 0, implicit %0 + ; CHECK: [[INT:%[0-9]+]]:vcc(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.live.mask) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](i1) + %0:_(i1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.live.mask) + S_ENDPGM 0, implicit %0(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir index 67ed51a396c8b..ae57e08eb7c86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir @@ -13,24 +13,37 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %3(<4 x bf16>), %4(<4 x bf16>), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -44,24 +57,37 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %3(<4 x bf16>), %4(<4 x bf16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -75,24 +101,37 @@ body: | ; FAST-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %3(<4 x bf16>), %4(<4 x bf16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -106,24 +145,37 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %3(<4 x bf16>), %4(<4 x bf16>), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -137,24 +189,37 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %3(<4 x bf16>), %4(<4 x bf16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -168,24 +233,37 @@ body: | ; FAST-LABEL: name: mfma_f64_16x16x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<8 x f32>) = G_BITCAST [[COPY2]](<8 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<8 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<8 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<8 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<8 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST3]](<8 x i32>) + ; ; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - %3:_(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<8 x f32>) = G_BITCAST [[COPY2]](<8 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<8 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<8 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<8 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<8 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST3]](<8 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<8 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<8 x f32>) = G_BITCAST %2(<8 x i32>) + %6:_(<8 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %3(<4 x bf16>), %4(<4 x bf16>), %5(<8 x f32>), 0, 0, 0 + %7:_(<8 x i32>) = G_BITCAST %6(<8 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %7(<8 x i32>) ... --- @@ -199,22 +277,35 @@ body: | ; FAST-LABEL: name: mfma_f64_4x4x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x i32>) = COPY $agpr0_agpr1 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<2 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<2 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<2 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<2 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + ; ; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $agpr0_agpr1 - %3:_(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x i32>) = COPY $agpr0_agpr1 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<2 x f32>) = G_BITCAST [[COPY2]](<2 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<2 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<4 x bf16>), [[BITCAST2]](<2 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<2 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<2 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST3]](<2 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = COPY $agpr0_agpr1 + %3:_(<4 x bf16>) = G_BITCAST %0(i64) + %4:_(<4 x bf16>) = G_BITCAST %1(i64) + %5:_(<2 x f32>) = G_BITCAST %2(<2 x i32>) + %6:_(<2 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %3(<4 x bf16>), %4(<4 x bf16>), %5(<2 x f32>), 0, 0, 0 + %7:_(<2 x i32>) = G_BITCAST %6(<2 x f32>) + $vgpr0_vgpr1 = COPY %7(<2 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir index e11586e464fb4..0e9ecaa691a75 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir @@ -13,24 +13,25 @@ body: | ; FAST-LABEL: name: mfma_i32_16x16x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](i64), [[COPY1]](i64), [[COPY2]](<4 x i32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + ; ; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](i64), [[COPY1]](i64), [[COPY2]](<4 x i32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0(i64), %1(i64), %2(<4 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -44,24 +45,25 @@ body: | ; FAST-LABEL: name: mfma_i32_32x32x16_i8_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](i64), [[COPY1]](i64), [[COPY2]](<16 x i32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + ; ; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](i64), [[COPY1]](i64), [[COPY2]](<16 x i32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0(i64), %1(i64), %2(<16 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<16 x i32>) ... --- @@ -75,24 +77,37 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x8_xf32_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(f64) = G_BITCAST %0(i64) + %4:_(f64) = G_BITCAST %1(i64) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %3(f64), %4(f64), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -106,24 +121,37 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4_xf32_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + ; ; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY1]](i64) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[BITCAST]](f64), [[BITCAST1]](f64), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(f64) = G_BITCAST %0(i64) + %4:_(f64) = G_BITCAST %1(i64) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %3(f64), %4(f64), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -137,27 +165,40 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x f16>) = G_BITCAST [[COPY1]](i128) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<8 x f16>), [[BITCAST2]](<4 x f32>), [[COPY3]](i32), 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(s32) = COPY $vgpr20 - %4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x f16>) = G_BITCAST [[COPY1]](i128) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<8 x f16>), [[BITCAST2]](<4 x f32>), [[COPY3]](i32), 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(i32) = COPY $vgpr20 + %4:_(<4 x f16>) = G_BITCAST %0(i64) + %5:_(<8 x f16>) = G_BITCAST %1(i128) + %6:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %7:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %4(<4 x f16>), %5(<8 x f16>), %6(<4 x f32>), %3(i32), 0, 0 + %8:_(<4 x i32>) = G_BITCAST %7(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %8(<4 x i32>) ... --- @@ -171,27 +212,40 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x f16>) = G_BITCAST [[COPY1]](i128) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<8 x f16>), [[BITCAST2]](<16 x f32>), [[COPY3]](i32), 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + ; ; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(s32) = COPY $vgpr20 - %4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x f16>) = G_BITCAST [[COPY1]](i128) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<8 x f16>), [[BITCAST2]](<16 x f32>), [[COPY3]](i32), 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(i32) = COPY $vgpr20 + %4:_(<4 x f16>) = G_BITCAST %0(i64) + %5:_(<8 x f16>) = G_BITCAST %1(i128) + %6:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %7:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %4(<4 x f16>), %5(<8 x f16>), %6(<16 x f32>), %3(i32), 0, 0 + %8:_(<16 x i32>) = G_BITCAST %7(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8(<16 x i32>) ... --- @@ -205,27 +259,40 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x bf16>) = G_BITCAST [[COPY1]](i128) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<8 x bf16>), [[BITCAST2]](<4 x f32>), [[COPY3]](i32), 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + ; ; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(s32) = COPY $vgpr20 - %4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x bf16>) = G_BITCAST [[COPY1]](i128) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<8 x bf16>), [[BITCAST2]](<4 x f32>), [[COPY3]](i32), 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(i32) = COPY $vgpr20 + %4:_(<4 x bf16>) = G_BITCAST %0(i64) + %5:_(<8 x bf16>) = G_BITCAST %1(i128) + %6:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %7:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %4(<4 x bf16>), %5(<8 x bf16>), %6(<4 x f32>), %3(i32), 0, 0 + %8:_(<4 x i32>) = G_BITCAST %7(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %8(<4 x i32>) ... --- @@ -239,27 +306,40 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; FAST-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; FAST-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x bf16>) = G_BITCAST [[COPY1]](i128) + ; FAST-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<8 x bf16>), [[BITCAST2]](<16 x f32>), [[COPY3]](i32), 0, 0 + ; FAST-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + ; ; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(s32) = COPY $vgpr20 - %4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x bf16>) = G_BITCAST [[COPY]](i64) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x bf16>) = G_BITCAST [[COPY1]](i128) + ; GREEDY-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[BITCAST]](<4 x bf16>), [[BITCAST1]](<8 x bf16>), [[BITCAST2]](<16 x f32>), [[COPY3]](i32), 0, 0 + ; GREEDY-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(i32) = COPY $vgpr20 + %4:_(<4 x bf16>) = G_BITCAST %0(i64) + %5:_(<8 x bf16>) = G_BITCAST %1(i128) + %6:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %7:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %4(<4 x bf16>), %5(<8 x bf16>), %6(<16 x f32>), %3(i32), 0, 0 + %8:_(<16 x i32>) = G_BITCAST %7(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8(<16 x i32>) ... --- @@ -273,27 +353,28 @@ body: | ; FAST-LABEL: name: smfmac_i32_16x16x64_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](i64), [[COPY1]](i128), [[COPY2]](<4 x i32>), [[COPY3]](i32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + ; ; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(s32) = COPY $vgpr20 - %4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](i64), [[COPY1]](i128), [[COPY2]](<4 x i32>), [[COPY3]](i32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(i32) = COPY $vgpr20 + %4:_(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0(i64), %1(i128), %2(<4 x i32>), %3(i32), 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4(<4 x i32>) ... --- @@ -307,25 +388,26 @@ body: | ; FAST-LABEL: name: smfmac_i32_32x32x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](i64), [[COPY1]](i128), [[COPY2]](<16 x i32>), [[COPY3]](i32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + ; ; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(s32) = COPY $vgpr20 - %4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](i64), [[COPY1]](i128), [[COPY2]](<16 x i32>), [[COPY3]](i32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(i32) = COPY $vgpr20 + %4:_(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0(i64), %1(i128), %2(<16 x i32>), %3(i32), 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4(<16 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir index e34a62d29a102..3866b8f468f34 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir @@ -13,16 +13,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x1f32_vva ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %3(f32), %4(f32), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -36,19 +44,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x1f32_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x f32>) = COPY [[BITCAST2]](<32 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](<32 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %3(f32), %4(f32), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -62,16 +78,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x1f32_vva ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %3(f32), %4(f32), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -85,19 +109,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x1f32_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x f32>) = COPY [[BITCAST2]](<16 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %3(f32), %4(f32), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -111,16 +143,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_4x4x1f32_vva ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %3(f32), %4(f32), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -134,19 +174,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_4x4x1f32_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x f32>) = COPY [[BITCAST2]](<4 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %3(f32), %4(f32), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -160,16 +208,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x2f32_vva ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %3(f32), %4(f32), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -183,19 +239,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x2f32_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x f32>) = COPY [[BITCAST2]](<16 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %3(f32), %4(f32), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -209,16 +273,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x4f32_vva ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[BITCAST]](f32), [[BITCAST1]](f32), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %3(f32), %4(f32), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -232,19 +304,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x4f32_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x f32>) = COPY [[BITCAST2]](<4 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](f32), [[COPY4]](f32), [[COPY5]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %3(f32), %4(f32), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -258,16 +338,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x4f16_vva ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<4 x f16>), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %3(<4 x f16>), %4(<4 x f16>), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -281,19 +369,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x4f16_sss ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 - %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 - %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr32_sgpr33 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr34_sgpr35 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST]](<4 x f16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST1]](<4 x f16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x f32>) = COPY [[BITCAST2]](<32 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x f16>), [[COPY4]](<4 x f16>), [[COPY5]](<32 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(<4 x i16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x i16>) = COPY $sgpr34_sgpr35 + %2:_(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %3(<4 x f16>), %4(<4 x f16>), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -307,16 +403,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x4f16_vva ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<4 x f16>), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %3(<4 x f16>), %4(<4 x f16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -330,19 +434,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x4f16_sss ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 - %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr32_sgpr33 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr34_sgpr35 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST]](<4 x f16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST1]](<4 x f16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x f32>) = COPY [[BITCAST2]](<16 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x f16>), [[COPY4]](<4 x f16>), [[COPY5]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<4 x i16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x i16>) = COPY $sgpr34_sgpr35 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %3(<4 x f16>), %4(<4 x f16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -356,16 +468,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_4x4x4f16_vva ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<4 x f16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %3(<4 x f16>), %4(<4 x f16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -379,19 +499,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_4x4x4f16_sss ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 - %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr32_sgpr33 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr34_sgpr35 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST]](<4 x f16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST1]](<4 x f16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x f32>) = COPY [[BITCAST2]](<4 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x f16>), [[COPY4]](<4 x f16>), [[COPY5]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x i16>) = COPY $sgpr34_sgpr35 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %3(<4 x f16>), %4(<4 x f16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -405,16 +533,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x8f16_vva ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<4 x f16>), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %3(<4 x f16>), %4(<4 x f16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -428,19 +564,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x8f16_sss ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 - %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr32_sgpr33 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr34_sgpr35 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST]](<4 x f16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST1]](<4 x f16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x f32>) = COPY [[BITCAST2]](<16 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x f16>), [[COPY4]](<4 x f16>), [[COPY5]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<4 x i16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x i16>) = COPY $sgpr34_sgpr35 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %3(<4 x f16>), %4(<4 x f16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -454,16 +598,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x16f16_vva ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[BITCAST]](<4 x f16>), [[BITCAST1]](<4 x f16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %3(<4 x f16>), %4(<4 x f16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -477,19 +629,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x16f16_sss ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 - %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr32_sgpr33 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr34_sgpr35 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<4 x f16>) = G_BITCAST [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST]](<4 x f16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x f16>) = COPY [[BITCAST1]](<4 x f16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x f32>) = COPY [[BITCAST2]](<4 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x f16>), [[COPY4]](<4 x f16>), [[COPY5]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<4 x i16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x i16>) = COPY $sgpr34_sgpr35 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x f16>) = G_BITCAST %0(<4 x i16>) + %4:_(<4 x f16>) = G_BITCAST %1(<4 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %3(<4 x f16>), %4(<4 x f16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -503,16 +663,16 @@ body: | ; CHECK-LABEL: name: mfma_i32_32x32x4i8_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr2 - %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](i32), [[COPY1]](i32), [[COPY2]](<32 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INTRINSIC_CONVERGENT]](<32 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr2 + %2:_(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0(i32), %1(i32), %2(<32 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3(<32 x i32>) ... --- @@ -526,19 +686,19 @@ body: | ; CHECK-LABEL: name: mfma_i32_32x32x4i8_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x i32>) = COPY [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](<32 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INTRINSIC_CONVERGENT]](<32 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0(i32), %1(i32), %2(<32 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3(<32 x i32>) ... --- @@ -552,16 +712,16 @@ body: | ; CHECK-LABEL: name: mfma_i32_16x16x4i8_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr2 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](i32), [[COPY1]](i32), [[COPY2]](<16 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr2 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0(i32), %1(i32), %2(<16 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<16 x i32>) ... --- @@ -575,19 +735,19 @@ body: | ; CHECK-LABEL: name: mfma_i32_16x16x4i8_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x i32>) = COPY [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](<16 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0(i32), %1(i32), %2(<16 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<16 x i32>) ... --- @@ -601,16 +761,16 @@ body: | ; CHECK-LABEL: name: mfma_i32_4x4x4i8_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr2 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](i32), [[COPY1]](i32), [[COPY2]](<4 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr2 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0(i32), %1(i32), %2(<4 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -624,19 +784,19 @@ body: | ; CHECK-LABEL: name: mfma_i32_4x4x4i8_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x i32>) = COPY [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](<4 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0(i32), %1(i32), %2(<4 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -650,16 +810,16 @@ body: | ; CHECK-LABEL: name: mfma_i32_32x32x8i8_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr2 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](i32), [[COPY1]](i32), [[COPY2]](<16 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr2 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0(i32), %1(i32), %2(<16 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<16 x i32>) ... --- @@ -673,19 +833,19 @@ body: | ; CHECK-LABEL: name: mfma_i32_32x32x8i8_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x i32>) = COPY [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](<16 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INTRINSIC_CONVERGENT]](<16 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0(i32), %1(i32), %2(<16 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<16 x i32>) ... --- @@ -699,16 +859,16 @@ body: | ; CHECK-LABEL: name: mfma_i32_16x16x16i8_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr2 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](i32), [[COPY1]](i32), [[COPY2]](<4 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr2 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0(i32), %1(i32), %2(<4 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -722,19 +882,19 @@ body: | ; CHECK-LABEL: name: mfma_i32_16x16x16i8_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(s32) = COPY $sgpr32 - %1:_(s32) = COPY $sgpr33 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x i32>) = COPY [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](<4 x i32>), 0, 0, 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INTRINSIC_CONVERGENT]](<4 x i32>) + %0:_(i32) = COPY $sgpr32 + %1:_(i32) = COPY $sgpr33 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x i32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0(i32), %1(i32), %2(<4 x i32>), 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -748,16 +908,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x2bf16_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[BITCAST]](<2 x bf16>), [[BITCAST1]](<2 x bf16>), [[BITCAST2]](<32 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<32 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -771,19 +939,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x2bf16_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) - %0:_(<2 x s16>) = COPY $sgpr32 - %1:_(<2 x s16>) = COPY $sgpr33 - %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<32 x f32>) = G_BITCAST [[COPY2]](<32 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST]](<2 x bf16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST1]](<2 x bf16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x f32>) = COPY [[BITCAST2]](<32 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x bf16>), [[COPY4]](<2 x bf16>), [[COPY5]](<32 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<32 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<32 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[BITCAST3]](<32 x i32>) + %0:_(<2 x i16>) = COPY $sgpr32 + %1:_(<2 x i16>) = COPY $sgpr33 + %2:_(<32 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<32 x f32>) = G_BITCAST %2(<32 x i32>) + %6:_(<32 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<32 x f32>), 0, 0, 0 + %7:_(<32 x i32>) = G_BITCAST %6(<32 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %7(<32 x i32>) ... --- @@ -797,16 +973,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x2bf16_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[BITCAST]](<2 x bf16>), [[BITCAST1]](<2 x bf16>), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -820,19 +1004,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x2bf16_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<2 x s16>) = COPY $sgpr32 - %1:_(<2 x s16>) = COPY $sgpr33 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST]](<2 x bf16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST1]](<2 x bf16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x f32>) = COPY [[BITCAST2]](<16 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x bf16>), [[COPY4]](<2 x bf16>), [[COPY5]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<2 x i16>) = COPY $sgpr32 + %1:_(<2 x i16>) = COPY $sgpr33 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -846,16 +1038,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_4x4x2bf16_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[BITCAST]](<2 x bf16>), [[BITCAST1]](<2 x bf16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -869,19 +1069,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_4x4x2bf16_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<2 x s16>) = COPY $sgpr32 - %1:_(<2 x s16>) = COPY $sgpr33 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST]](<2 x bf16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST1]](<2 x bf16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x f32>) = COPY [[BITCAST2]](<4 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x bf16>), [[COPY4]](<2 x bf16>), [[COPY5]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<2 x i16>) = COPY $sgpr32 + %1:_(<2 x i16>) = COPY $sgpr33 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -895,16 +1103,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x4bf16_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[BITCAST]](<2 x bf16>), [[BITCAST1]](<2 x bf16>), [[BITCAST2]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<16 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -918,19 +1134,27 @@ body: | ; CHECK-LABEL: name: mfma_f32_32x32x4bf16_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) - %0:_(<2 x s16>) = COPY $sgpr32 - %1:_(<2 x s16>) = COPY $sgpr33 - %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<16 x f32>) = G_BITCAST [[COPY2]](<16 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST]](<2 x bf16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST1]](<2 x bf16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x f32>) = COPY [[BITCAST2]](<16 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x bf16>), [[COPY4]](<2 x bf16>), [[COPY5]](<16 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<16 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<16 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST3]](<16 x i32>) + %0:_(<2 x i16>) = COPY $sgpr32 + %1:_(<2 x i16>) = COPY $sgpr33 + %2:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<16 x f32>) = G_BITCAST %2(<16 x i32>) + %6:_(<16 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<16 x f32>), 0, 0, 0 + %7:_(<16 x i32>) = G_BITCAST %6(<16 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %7(<16 x i32>) ... --- @@ -944,16 +1168,24 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x8bf16_vva ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:agpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[BITCAST]](<2 x bf16>), [[BITCAST1]](<2 x bf16>), [[BITCAST2]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr2 + %2:_(<4 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... --- @@ -967,17 +1199,25 @@ body: | ; CHECK-LABEL: name: mfma_f32_16x16x8bf16_sss ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) - %0:_(<2 x s16>) = COPY $sgpr32 - %1:_(<2 x s16>) = COPY $sgpr33 - %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr33 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<2 x bf16>) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<4 x f32>) = G_BITCAST [[COPY2]](<4 x i32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST]](<2 x bf16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x bf16>) = COPY [[BITCAST1]](<2 x bf16>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x f32>) = COPY [[BITCAST2]](<4 x f32>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:agpr(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x bf16>), [[COPY4]](<2 x bf16>), [[COPY5]](<4 x f32>), 0, 0, 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:agpr(<4 x i32>) = G_BITCAST [[INTRINSIC_CONVERGENT]](<4 x f32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST3]](<4 x i32>) + %0:_(<2 x i16>) = COPY $sgpr32 + %1:_(<2 x i16>) = COPY $sgpr33 + %2:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<2 x bf16>) = G_BITCAST %0(<2 x i16>) + %4:_(<2 x bf16>) = G_BITCAST %1(<2 x i16>) + %5:_(<4 x f32>) = G_BITCAST %2(<4 x i32>) + %6:_(<4 x f32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %3(<2 x bf16>), %4(<2 x bf16>), %5(<4 x f32>), 0, 0, 0 + %7:_(<4 x i32>) = G_BITCAST %6(<4 x f32>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %7(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ps.live.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ps.live.mir index 50382cf4ea8d6..d03ab0f55f055 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ps.live.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ps.live.mir @@ -9,9 +9,8 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: ps_live - ; CHECK: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.ps.live) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s1) - %0:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.ps.live) - S_ENDPGM 0, implicit %0 + ; CHECK: [[INT:%[0-9]+]]:vcc(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.ps.live) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](i1) + %0:_(i1) = G_INTRINSIC intrinsic(@llvm.amdgcn.ps.live) + S_ENDPGM 0, implicit %0(i1) ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir index 04cdf2e9fce73..e1666a6135f3a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir @@ -12,11 +12,11 @@ body: | ; CHECK-LABEL: name: readfirstlane_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: readfirstlane_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir index 3065e79fdd01a..a24e2dca202b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir @@ -12,13 +12,13 @@ body: | ; CHECK-LABEL: name: readlane_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... --- @@ -31,12 +31,12 @@ body: | ; CHECK-LABEL: name: readlane_vs ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... --- @@ -49,13 +49,13 @@ body: | ; CHECK-LABEL: name: readlane_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](i32), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... --- @@ -68,14 +68,14 @@ body: | ; CHECK-LABEL: name: readlane_sv ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](i32), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... --- @@ -88,17 +88,17 @@ body: | ; CHECK-LABEL: name: readlane_aa ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s32) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY3]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](i32), [[V_READFIRSTLANE_B32_]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INTRINSIC_CONVERGENT]](i32) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -111,13 +111,13 @@ body: | ; CHECK-LABEL: name: readlane_as ; CHECK: liveins: $agpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... --- @@ -130,15 +130,15 @@ body: | ; CHECK-LABEL: name: readlane_sa ; CHECK: liveins: $agpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY3]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](i32), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... --- @@ -151,12 +151,12 @@ body: | ; CHECK-LABEL: name: readlane_va ; CHECK: liveins: $vgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY2]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](i32), [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0(i32), %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir index 45a2ab5b774c2..bf09798ac9a4b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -18,34 +18,35 @@ body: | ; FAST-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 256 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY3]], [[COPY2]] + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x i32>), [[C2]](i32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (i32)) + ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](i32) + ; ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] - ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr0 - %2:vgpr(s32) = G_CONSTANT i32 256 - %3:_(s32) = G_ADD %1, %2 - %4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0 - S_ENDPGM 0, implicit %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 256 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY2]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x i32>), [[C2]](i32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (i32)) + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](i32) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $sgpr0 + %2:vgpr(i32) = G_CONSTANT i32 256 + %3:_(i32) = G_ADD %1, %2 + %4:_(i32) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %3(i32), 0 + S_ENDPGM 0, implicit %4(i32) ... @@ -60,32 +61,33 @@ body: | ; FAST-LABEL: name: s_buffer_load_negative_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -60 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](i32) + ; ; GREEDY-LABEL: name: s_buffer_load_negative_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 -60 - %3:_(s32) = G_ADD %1, %2 - %4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0 - S_ENDPGM 0, implicit %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -60 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(i32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x i32>), [[C2]](i32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (i32)) + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](i32) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_CONSTANT i32 -60 + %3:_(i32) = G_ADD %1, %2 + %4:_(i32) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %3(i32), 0 + S_ENDPGM 0, implicit %4(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.get.waveid.in.workgroup.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.get.waveid.in.workgroup.mir index 8307fa88da182..f1613f30989c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.get.waveid.in.workgroup.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.get.waveid.in.workgroup.mir @@ -9,6 +9,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: s_get_waveid_in_workgroup - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.get.waveid.in.workgroup) - %0:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.get.waveid.in.workgroup) + ; CHECK: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.get.waveid.in.workgroup) + %0:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.get.waveid.in.workgroup) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir index 9650da855ba5a..ce29da374bf6e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir @@ -9,6 +9,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: getpc - ; CHECK: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) - %0:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) + ; CHECK: [[INT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) + %0:_(i64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getreg.mir index b4a02721ad631..7d9b43b472992 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getreg.mir @@ -9,6 +9,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: getreg - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.getreg), 0 - %0:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.getreg), 0 + ; CHECK: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.getreg), 0 + %0:_(i32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.getreg), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memrealtime.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memrealtime.mir index a2ec76a4950d3..fad242de7428b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memrealtime.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memrealtime.mir @@ -9,6 +9,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: memrealtime - ; CHECK: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memrealtime) - %0:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memrealtime) + ; CHECK: [[INT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memrealtime) + %0:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memrealtime) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memtime.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memtime.mir index 4caf9882f7b17..67378dd5ee395 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memtime.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.memtime.mir @@ -9,6 +9,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: memtime - ; CHECK: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memtime) - %0:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memtime) + ; CHECK: [[INT:%[0-9]+]]:sgpr(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memtime) + %0:_(i64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.memtime) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir index 3e9d3297091f6..ac81bfcb71ad9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: sendmsg_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0(i32) ... --- @@ -28,9 +28,9 @@ body: | ; CHECK-LABEL: name: sendmsg_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir index df490b26703fc..4db162c69384d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: sendmsghalt_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, %0(i32) ... --- @@ -28,9 +28,9 @@ body: | ; CHECK-LABEL: name: sendmsghalt_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, [[V_READFIRSTLANE_B32_]](s32) - %0:_(s32) = COPY $vgpr0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, [[V_READFIRSTLANE_B32_]](i32) + %0:_(i32) = COPY $vgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir index 778b577c77e54..4d0f10bc9d8dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](i32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0(p3), %1(i32), 0, 0, 0, 0 ... @@ -35,12 +35,12 @@ body: | ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](i32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0(p3), %1(i32), 0, 0, 0, 0 ... @@ -56,12 +56,12 @@ body: | ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](i32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0(p3), %1(i32), 0, 0, 0, 0 ... @@ -77,10 +77,10 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](i32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0(p3), %1(i32), 0, 0, 0, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir index 373caa258c43f..3200ae4a9ded0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir @@ -12,16 +12,16 @@ body: | ; CHECK-LABEL: name: wqm_demote_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %2(i1) ... --- @@ -34,14 +34,14 @@ body: | ; CHECK-LABEL: name: wqm_demote_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[ICMP]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %2(i1) ... --- @@ -51,12 +51,12 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: wqm_demote_constant_true - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1) - %0:_(s1) = G_CONSTANT i1 true - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0 + ; CHECK: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](i1) + %0:_(i1) = G_CONSTANT i1 true + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0(i1) ... --- @@ -66,10 +66,10 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: wqm_demote_constant_false - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1) - %0:_(s1) = G_CONSTANT i1 false - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0 + ; CHECK: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C]](i32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](i1) + %0:_(i1) = G_CONSTANT i1 false + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir index d5a0e03acb361..0b328a44ac9ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir @@ -12,11 +12,11 @@ body: | ; CHECK-LABEL: name: wqm_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: wqm_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir index aef8cda8e73e9..2dadcc2f764b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir @@ -12,16 +12,16 @@ body: | ; CHECK-LABEL: name: wqm_vote_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vcc(i1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2(i1) ... --- @@ -34,14 +34,14 @@ body: | ; CHECK-LABEL: name: wqm_vote_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vcc(i1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2(i1) ... --- @@ -54,11 +54,11 @@ body: | ; CHECK-LABEL: name: wqm_vote_sgpr ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vcc(i1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %1(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir index fe09e7555d41c..f041fc49099f2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir @@ -12,15 +12,15 @@ body: | ; CHECK-LABEL: name: writelane_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](i32), [[COPY1]](i32), [[COPY3]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0(i32), %1(i32), %2(i32) ... --- @@ -33,14 +33,14 @@ body: | ; CHECK-LABEL: name: writelane_ssv ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0(i32), %1(i32), %2(i32) ... --- @@ -53,15 +53,15 @@ body: | ; CHECK-LABEL: name: writelane_vsv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](i32), [[COPY1]](i32), [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0(i32), %1(i32), %2(i32) ... --- @@ -74,16 +74,16 @@ body: | ; CHECK-LABEL: name: writelane_vvv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0(i32), %1(i32), %2(i32) ... --- @@ -96,13 +96,13 @@ body: | ; CHECK-LABEL: name: writelane_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](i32), [[V_READFIRSTLANE_B32_]](i32), [[COPY2]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0(i32), %1(i32), %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir index e5590d7b59eec..ba5ce65a6f56b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir @@ -12,11 +12,11 @@ body: | ; CHECK-LABEL: name: strict_wwm_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: strict_wwm_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:vgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir index fa5df0f69bb18..5309f8d46e34d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: ffbh_u32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:sgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_AMDGPU_FFBH_U32 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:sgpr(i32) = G_AMDGPU_FFBH_U32 [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_AMDGPU_FFBH_U32 %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: ffbh_u32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_AMDGPU_FFBH_U32 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FFBH_U32 [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_AMDGPU_FFBH_U32 %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbl-b32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbl-b32.mir index 23fc3de6490af..0cc4c9d6d548f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbl-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbl-b32.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: ffbl_b32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:sgpr(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_AMDGPU_FFBL_B32 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:sgpr(i32) = G_AMDGPU_FFBL_B32 [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_AMDGPU_FFBL_B32 %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: ffbl_b32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_AMDGPU_FFBL_B32 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FFBL_B32 [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_AMDGPU_FFBL_B32 %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir index f372c1f81948f..bb65b419417f2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir @@ -12,7 +12,7 @@ body: | ; CHECK: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_WAVE_ADDRESS]](p5) %0:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p5) ... # TODO: Should infer v here @@ -29,5 +29,5 @@ body: | ; CHECK-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p1) :: (store (p5), addrspace 1) %0:_(p1) = G_IMPLICIT_DEF %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - G_STORE %1, %0 :: (store (p5), addrspace 1) + G_STORE %1(p5), %0(p1) :: (store (p5), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir index 1276752017812..a504b8caf5e43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir @@ -12,19 +12,19 @@ body: | ; CHECK-LABEL: name: and_s1_sgpr_sgpr ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_AND %2, %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:_(i1) = G_AND %2, %3 ... --- @@ -37,23 +37,23 @@ body: | ; CHECK-LABEL: name: and_s1_scc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(s1) = G_AND %3, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(i1) = G_AND %3, %4 ... --- @@ -66,19 +66,19 @@ body: | ; CHECK-LABEL: name: and_s1_vgpr_vgpr ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_AND %2, %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:_(i1) = G_AND %2, %3 ... --- @@ -91,20 +91,20 @@ body: | ; CHECK-LABEL: name: and_s1_vcc_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(s1) = G_AND %3, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(i1) = G_AND %3, %4 ... --- @@ -117,19 +117,19 @@ body: | ; CHECK-LABEL: name: and_s1_sgpr_vgpr ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_AND %2, %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:_(i1) = G_AND %2, %3 ... --- @@ -142,19 +142,19 @@ body: | ; CHECK-LABEL: name: and_s1_vgpr_sgpr ; CHECK: liveins: $vgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_AND %2, %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:_(i1) = G_AND %2, %3 ... # FIXME: Should just change the result bank of the scc compare. @@ -168,21 +168,21 @@ body: | ; CHECK-LABEL: name: and_s1_scc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY3]], [[ICMP1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(s1) = G_AND %3, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[COPY3]], [[ICMP1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(i1) = G_AND %3, %4 ... --- @@ -195,20 +195,20 @@ body: | ; CHECK-LABEL: name: and_s1_vcc_scc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %3:_(s1) = G_ICMP intpred(eq), %0, %2 - %4:_(s1) = G_ICMP intpred(eq), %1, %2 - %5:_(s1) = G_AND %3, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(eq), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(eq), %1(i32), %2 + %5:_(i1) = G_AND %3, %4 ... @@ -221,11 +221,11 @@ body: | # bb.0: # liveins: $sgpr0, $sgpr1 -# %0:_(s32) = COPY $sgpr0 -# %1:_(s32) = COPY $sgpr1 -# %2:_(s1) = G_TRUNC %0 -# %3:_(s1) = G_TRUNC %1 -# %4:vcc(s1) = G_AND %2, %3 +# %0:_(i32) = COPY $sgpr0 +# %1:_(i32) = COPY $sgpr1 +# %2:_(i1) = G_TRUNC %0 +# %3:_(i1) = G_TRUNC %1 +# %4:vcc(i1) = G_AND %2, %3 # ... # --- @@ -236,11 +236,11 @@ body: | # bb.0: # liveins: $vgpr0, $vgpr1 -# %0:_(s32) = COPY $vgpr0 -# %1:_(s32) = COPY $vgpr1 -# %2:_(s1) = G_TRUNC %0 -# %3:_(s1) = G_TRUNC %1 -# %4:vcc(s1) = G_AND %2, %3 +# %0:_(i32) = COPY $vgpr0 +# %1:_(i32) = COPY $vgpr1 +# %2:_(i1) = G_TRUNC %0 +# %3:_(i1) = G_TRUNC %1 +# %4:vcc(i1) = G_AND %2, %3 # ... # --- @@ -250,11 +250,11 @@ body: | # body: | # bb.0: # liveins: $vgpr0, $sgpr0 -# %0:_(s32) = COPY $vgpr0 -# %1:_(s32) = COPY $sgpr0 -# %2:_(s1) = G_TRUNC %0 -# %3:_(s1) = G_TRUNC %1 -# %4:vcc(s1) = G_AND %2, %3 +# %0:_(i32) = COPY $vgpr0 +# %1:_(i32) = COPY $sgpr0 +# %2:_(i1) = G_TRUNC %0 +# %3:_(i1) = G_TRUNC %1 +# %4:vcc(i1) = G_AND %2, %3 # ... # --- @@ -264,11 +264,11 @@ body: | # body: | # bb.0: # liveins: $vgpr0, $sgpr0 -# %0:_(s32) = COPY $sgpr0 -# %1:_(s32) = COPY $vgpr0 -# %2:_(s1) = G_TRUNC %0 -# %3:_(s1) = G_TRUNC %1 -# %4:vcc(s1) = G_AND %2, %3 +# %0:_(i32) = COPY $sgpr0 +# %1:_(i32) = COPY $vgpr0 +# %2:_(i1) = G_TRUNC %0 +# %3:_(i1) = G_TRUNC %1 +# %4:vcc(i1) = G_AND %2, %3 # ... --- @@ -281,19 +281,19 @@ body: | ; CHECK-LABEL: name: and_s1_vgpr_sgpr_sgpr ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:vgpr(s1) = G_AND %2, %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:vgpr(i1) = G_AND %2, %3 ... --- @@ -306,17 +306,17 @@ body: | ; CHECK-LABEL: name: and_s1_sgpr_sgpr_sgpr ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:sgpr(s1) = G_AND %2, %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[AND]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:sgpr(i1) = G_AND %2, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir index ae818e036747a..5deab202695a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: and_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_AND %0, %1 ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: and_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_AND %0, %1 ... --- @@ -49,13 +49,13 @@ body: | ; CHECK-LABEL: name: and_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_AND %0, %1 ... --- @@ -68,12 +68,12 @@ body: | ; CHECK-LABEL: name: and_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_AND %0, %1 ... --- @@ -86,12 +86,12 @@ body: | ; CHECK-LABEL: name: and_s64_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[COPY]], [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i64) = G_AND [[COPY]], [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_AND %0, %1 ... --- @@ -104,16 +104,16 @@ body: | ; CHECK-LABEL: name: and_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s64) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i64) = G_AND %0, %1 ... --- @@ -126,16 +126,16 @@ body: | ; CHECK-LABEL: name: and_s64_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $sgpr0_sgpr1 - %2:_(s64) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $sgpr0_sgpr1 + %2:_(i64) = G_AND %0, %1 ... --- @@ -148,16 +148,16 @@ body: | ; CHECK-LABEL: name: and_s64_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_AND %0, %1 ... --- @@ -170,18 +170,18 @@ body: | ; CHECK-LABEL: name: and_s64_vv_user ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_AND %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_AND %0, %1 + S_NOP 0, implicit %2(i64) ... --- name: and_s64_ss_ss_merge @@ -193,22 +193,22 @@ body: | ; CHECK-LABEL: name: and_s64_ss_ss_merge ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[MV]], [[MV1]] - ; CHECK-NEXT: S_NOP 0, implicit [[AND]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %0, %1 - %5:_(s64) = G_MERGE_VALUES %2, %3 - %6:_(s64) = G_AND %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i64) = G_AND [[MV]], [[MV1]] + ; CHECK-NEXT: S_NOP 0, implicit [[AND]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %5:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %6:_(i64) = G_AND %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -221,26 +221,26 @@ body: | ; CHECK-LABEL: name: and_s64_vv_vv_merge ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s64) = G_MERGE_VALUES %0, %1 - %5:_(s64) = G_MERGE_VALUES %2, %3 - %6:_(s64) = G_AND %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %5:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %6:_(i64) = G_AND %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -253,23 +253,23 @@ body: | ; CHECK-LABEL: name: and_s64_s_sv_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 - %3:_(s64) = G_MERGE_VALUES %1, %2 - %4:_(s64) = G_AND %0, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $vgpr0 + %3:_(i64) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i64) = G_AND %0, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -282,23 +282,23 @@ body: | ; CHECK-LABEL: name: and_s64_s_vs_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 - %3:_(s64) = G_MERGE_VALUES %2, %1 - %4:_(s64) = G_AND %0, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $vgpr0 + %3:_(i64) = G_MERGE_VALUES %2(i32), %1(i32) + %4:_(i64) = G_AND %0, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -311,28 +311,28 @@ body: | ; CHECK-LABEL: name: and_s64_sv_sv_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %0, %2 - %5:_(s64) = G_MERGE_VALUES %1, %3 - %6:_(s64) = G_AND %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY5]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %0(i32), %2(i32) + %5:_(i64) = G_MERGE_VALUES %1(i32), %3(i32) + %6:_(i64) = G_AND %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -345,28 +345,28 @@ body: | ; CHECK-LABEL: name: and_s64_sv_vs_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %0, %2 - %5:_(s64) = G_MERGE_VALUES %3, %1 - %6:_(s64) = G_AND %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %0(i32), %2(i32) + %5:_(i64) = G_MERGE_VALUES %3(i32), %1(i32) + %6:_(i64) = G_AND %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -379,26 +379,26 @@ body: | ; CHECK-LABEL: name: and_chain_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[UV4]], [[UV6]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[UV5]], [[UV7]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = COPY $vgpr0_vgpr1 - %3:_(s64) = G_AND %0, %2 - %4:_(s64) = G_AND %1, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(i32) = G_AND [[UV4]], [[UV6]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(i32) = G_AND [[UV5]], [[UV7]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND2]](i32), [[AND3]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = COPY $vgpr0_vgpr1 + %3:_(i64) = G_AND %0, %2 + %4:_(i64) = G_AND %1, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -411,14 +411,14 @@ body: | ; CHECK-LABEL: name: and_v2i32_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s32>) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[AND]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:_(<2 x s32>) = G_AND %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x i32>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[AND]](<2 x i32>) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:_(<2 x i32>) = G_AND %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -431,18 +431,18 @@ body: | ; CHECK-LABEL: name: and_v2i32_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s32>) = G_AND %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x i32>) = G_AND %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -456,18 +456,18 @@ body: | ; CHECK-LABEL: name: and_v2i32_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %2:_(<2 x s32>) = G_AND %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %2:_(<2 x i32>) = G_AND %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -480,18 +480,18 @@ body: | ; CHECK-LABEL: name: and_v2i32_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_AND %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_AND %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -504,12 +504,12 @@ body: | ; CHECK-LABEL: name: and_v4s16_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<4 x s16>) = G_AND [[COPY]], [[COPY1]] - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:_(<4 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<4 x i16>) = G_AND [[COPY]], [[COPY1]] + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:_(<4 x i16>) = G_AND %0, %1 ... --- @@ -522,16 +522,16 @@ body: | ; CHECK-LABEL: name: and_v4s16_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %2:_(<4 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x i16>), [[UV1:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[AND]](<2 x i16>), [[AND1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %2:_(<4 x i16>) = G_AND %0, %1 ... --- @@ -544,16 +544,16 @@ body: | ; CHECK-LABEL: name: and_v4s16_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %2:_(<4 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x i16>), [[UV3:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[AND]](<2 x i16>), [[AND1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %2:_(<4 x i16>) = G_AND %0, %1 ... --- @@ -566,16 +566,16 @@ body: | ; CHECK-LABEL: name: and_v4s16_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[AND]](<2 x i16>), [[AND1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_AND %0, %1 ... --- @@ -588,12 +588,12 @@ body: | ; CHECK-LABEL: name: and_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x i16>) = G_AND [[COPY]], [[COPY1]] + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_AND %0, %1 ... --- @@ -606,13 +606,13 @@ body: | ; CHECK-LABEL: name: and_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY2]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[COPY2]], [[COPY1]] + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_AND %0, %1 ... --- @@ -625,13 +625,13 @@ body: | ; CHECK-LABEL: name: and_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY2]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[COPY]], [[COPY2]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_AND %0, %1 ... --- @@ -644,11 +644,10 @@ body: | ; CHECK-LABEL: name: and_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_AND %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x i16>) = G_AND [[COPY]], [[COPY1]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_AND %0, %1 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir index 22de4d00c2a51..fa276c45f57d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: anyext_s32_to_s64_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s64) = G_ANYEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i64) = G_ANYEXT [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i64) = G_ANYEXT %0(i32) ... --- @@ -28,12 +28,12 @@ body: | ; CHECK-LABEL: name: anyext_s32_to_s64_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[DEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ANYEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY1]](i32), [[DEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ANYEXT %0(i32) ... --- @@ -46,15 +46,15 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s16_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_ANYEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i16) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_ANYEXT %2(i1) ... --- @@ -67,15 +67,15 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s32_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s32) = G_ANYEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i32) = G_ANYEXT %2(i1) ... --- @@ -88,15 +88,15 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s64_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_ANYEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i64) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_ANYEXT %2(i1) ... --- @@ -109,17 +109,17 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s16_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_ANYEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[SELECT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_ANYEXT %2(i1) ... --- @@ -132,16 +132,16 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s32_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s32) = G_ANYEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i32) = G_ANYEXT %2(i1) ... --- @@ -154,18 +154,18 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s64_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[DEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_ANYEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[DEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_ANYEXT %2(i1) ... --- @@ -178,12 +178,12 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s16_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_ANYEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i16) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i16) = G_ANYEXT %1(i1) ... --- @@ -196,12 +196,12 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s32_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_ANYEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_ANYEXT %1(i1) ... --- @@ -214,12 +214,12 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s64_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_ANYEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i64) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_ANYEXT %1(i1) ... --- @@ -232,12 +232,12 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s16_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_ANYEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i16) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i16) = G_ANYEXT %1(i1) ... --- @@ -250,12 +250,12 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s32_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_ANYEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_ANYEXT %1(i1) ... --- @@ -268,12 +268,12 @@ body: | ; CHECK-LABEL: name: anyext_s1_to_s64_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ANYEXT]](s32), [[DEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_ANYEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[ANYEXT]](i32), [[DEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_ANYEXT %1(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir index 615cfec2b31cf..c50dd03d85885 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: ashr_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -32,15 +32,15 @@ body: | ; CHECK-LABEL: name: ashr_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY2]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[COPY2]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -53,15 +53,15 @@ body: | ; CHECK-LABEL: name: ashr_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY]], [[COPY2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[COPY]], [[COPY2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -74,14 +74,14 @@ body: | ; CHECK-LABEL: name: ashr_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_ASHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -94,21 +94,21 @@ body: | ; CHECK-LABEL: name: ashr_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ASHR]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[SEXT]], [[ZEXT]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[ASHR]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -122,19 +122,19 @@ body: | ; CHECK-LABEL: name: ashr_s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[COPY2]], [[TRUNC1]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[COPY2]], [[TRUNC1]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -147,19 +147,19 @@ body: | ; CHECK-LABEL: name: ashr_s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY2]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[COPY2]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... @@ -173,18 +173,18 @@ body: | ; CHECK-LABEL: name: ashr_s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_ASHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i16) = G_ASHR [[TRUNC]], [[TRUNC1]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_ASHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... @@ -198,24 +198,24 @@ body: | ; CHECK-LABEL: name: ashr_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:sgpr(s32) = G_ASHR [[SEXT_INREG]], [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:sgpr(s32) = G_ASHR [[ASHR]], [[ASHR1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ASHR2]](s32), [[ASHR3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:sgpr(i32) = G_ASHR [[SEXT_INREG]], [[SEXT_INREG1]](i32) + ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:sgpr(i32) = G_ASHR [[ASHR]], [[ASHR1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[ASHR2]](i32), [[ASHR3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -229,15 +229,15 @@ body: | ; CHECK-LABEL: name: ashr_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY2]], [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x i16>) = G_ASHR [[COPY2]], [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -250,15 +250,15 @@ body: | ; CHECK-LABEL: name: ashr_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY2]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x i16>) = G_ASHR [[COPY]], [[COPY2]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -272,13 +272,13 @@ body: | ; CHECK-LABEL: name: ashr_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_ASHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x i16>) = G_ASHR [[COPY]], [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_ASHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir index 08acea98700ab..e8aadab729943 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir @@ -18,7 +18,7 @@ body: | ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1) %copy:_(p1) = COPY $vgpr0_vgpr1 %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4 - S_ENDPGM 0, implicit %assert_align + S_ENDPGM 0, implicit %assert_align(p1) ... --- @@ -38,7 +38,7 @@ body: | ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1) %copy:_(p1) = COPY $sgpr8_sgpr9 %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4 - S_ENDPGM 0, implicit %assert_align + S_ENDPGM 0, implicit %assert_align(p1) ... --- @@ -58,5 +58,5 @@ body: | ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1) %copy:_(p1) = COPY $agpr0_agpr1 %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4 - S_ENDPGM 0, implicit %assert_align + S_ENDPGM 0, implicit %assert_align(p1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir index 0bce908969c17..a15736ef21b59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir @@ -13,12 +13,12 @@ body: | ; CHECK-LABEL: name: assert_zext_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %copy:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT %copy, 4 - ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) - %copy:_(s32) = COPY $vgpr0 - %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 - S_ENDPGM 0, implicit %assert_zext + ; CHECK-NEXT: %copy:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: %assert_zext:vgpr(i32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(i32) + %copy:_(i32) = COPY $vgpr0 + %assert_zext:_(i32) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext(i32) ... --- @@ -33,12 +33,12 @@ body: | ; CHECK-LABEL: name: assert_zext_sgpr ; CHECK: liveins: $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %copy:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: %assert_zext:sgpr(s32) = G_ASSERT_ZEXT %copy, 4 - ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) - %copy:_(s32) = COPY $sgpr8 - %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 - S_ENDPGM 0, implicit %assert_zext + ; CHECK-NEXT: %copy:sgpr(i32) = COPY $sgpr8 + ; CHECK-NEXT: %assert_zext:sgpr(i32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(i32) + %copy:_(i32) = COPY $sgpr8 + %assert_zext:_(i32) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext(i32) ... --- @@ -53,12 +53,12 @@ body: | ; CHECK-LABEL: name: assert_zext_agpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %copy:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: %assert_zext:agpr(s32) = G_ASSERT_ZEXT %copy, 4 - ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) - %copy:_(s32) = COPY $agpr0 - %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 - S_ENDPGM 0, implicit %assert_zext + ; CHECK-NEXT: %copy:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: %assert_zext:agpr(i32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(i32) + %copy:_(i32) = COPY $agpr0 + %assert_zext:_(i32) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext(i32) ... --- @@ -73,12 +73,12 @@ body: | ; CHECK-LABEL: name: assert_zext_vgpr_regclass ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %copy:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT %copy, 4 - ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) - %copy:vgpr_32(s32) = COPY $vgpr0 - %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 - S_ENDPGM 0, implicit %assert_zext + ; CHECK-NEXT: %copy:vgpr_32(i32) = COPY $vgpr0 + ; CHECK-NEXT: %assert_zext:vgpr(i32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(i32) + %copy:vgpr_32(i32) = COPY $vgpr0 + %assert_zext:_(i32) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext(i32) ... --- @@ -93,10 +93,10 @@ body: | ; CHECK-LABEL: name: assert_zext_sgpr_regcllass ; CHECK: liveins: $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %copy:sgpr_32(s32) = COPY $sgpr8 - ; CHECK-NEXT: %assert_zext:sgpr(s32) = G_ASSERT_ZEXT %copy, 4 - ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) - %copy:sgpr_32(s32) = COPY $sgpr8 - %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 - S_ENDPGM 0, implicit %assert_zext + ; CHECK-NEXT: %copy:sgpr_32(i32) = COPY $sgpr8 + ; CHECK-NEXT: %assert_zext:sgpr(i32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(i32) + %copy:sgpr_32(i32) = COPY $sgpr8 + %assert_zext:_(i32) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir index 48ce59ffd3c43..9034c28d88ea3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir @@ -13,16 +13,16 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p1), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(i32) = G_ATOMIC_CMPXCHG [[COPY3]](p1), [[COPY4]], [[COPY5]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p1), %1, %2 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -36,16 +36,16 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p0), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(i32) = G_ATOMIC_CMPXCHG [[COPY3]](p0), [[COPY4]], [[COPY5]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store seq_cst (i32)) ... --- @@ -59,14 +59,14 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p3), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(i32) = G_ATOMIC_CMPXCHG [[COPY3]](p3), [[COPY4]], [[COPY5]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_ATOMIC_CMPXCHG %0(p3), %1, %2 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir index bcd676f31c90a..c5cd1732c5b09 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_ADD %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_ADD %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_ADD %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir index e4fe9c5c7d9f3..d9d5c35e16bf8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_AND %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_AND %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_AND %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir index 11833cab3c07f..27e737bf73d0a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir @@ -13,11 +13,13 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(f32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (f32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_ATOMICRMW_FADD %0(p3), %2 :: (load store seq_cst (f32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir index 82b1dccc5458a..d4ed079d17453 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_MAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MAX %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_MAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MAX %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_MAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_MAX %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir index 44e44851f7004..5a718e8e306be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_MIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MIN %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_MIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_MIN %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_MIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_MIN %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir index 3a16d72cb8ebd..35d6895fc89f0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_OR %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_OR %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_OR %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir index e7b9c8efff6ce..805bda81ad7cf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_SUB %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_SUB %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_SUB %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir index 35ec36275c3cd..c9856ac187cd2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_UMAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_UMAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_UMAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_UMAX %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir index 1e971bfba160e..a7a74835c7637 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_UMIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_UMIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_UMIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_UMIN %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir index db01a21d061d3..3e689e51e94c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir index 903d6f54ba46d..6f8c86ef3a94b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir @@ -13,13 +13,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (i32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XOR %0(p1), %1 :: (load store seq_cst (i32), addrspace 1) ... --- @@ -33,13 +33,13 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (i32)) %0:_(p0) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0) + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = G_ATOMICRMW_XOR %0(p0), %1 :: (load store seq_cst (i32)) ... --- @@ -53,11 +53,11 @@ body: | ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(i32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (i32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_ATOMICRMW_XOR %0(p3), %1 :: (load store seq_cst (i32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir index 550f042618abf..8fc9a0ea4a231 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: bitcast_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x s16>) = G_BITCAST [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(<2 x s16>) = G_BITCAST %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(<2 x i16>) = G_BITCAST [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(<2 x i16>) = G_BITCAST %0(i32) ... --- @@ -28,8 +28,8 @@ body: | ; CHECK-LABEL: name: bitcast_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(<2 x s16>) = G_BITCAST %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<2 x i16>) = G_BITCAST [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(<2 x i16>) = G_BITCAST %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir index e82a492dbec20..f0b2bc60ce686 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: bitreverse_i32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:sgpr(s32) = G_BITREVERSE [[COPY]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_BITREVERSE %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:sgpr(i32) = G_BITREVERSE [[COPY]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_BITREVERSE %0 ... --- @@ -28,10 +28,10 @@ body: | ; CHECK-LABEL: name: bitreverse_i32_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_BITREVERSE %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(i32) = G_BITREVERSE [[COPY]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_BITREVERSE %0 ... --- @@ -44,10 +44,10 @@ body: | ; CHECK-LABEL: name: bitreverse_i64_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:sgpr(s64) = G_BITREVERSE [[COPY]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_BITREVERSE %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:sgpr(i64) = G_BITREVERSE [[COPY]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = G_BITREVERSE %0 ... --- @@ -60,11 +60,11 @@ body: | ; CHECK-LABEL: name: bitreverse_i64_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV1]] - ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[BITREVERSE]](s32), [[BITREVERSE1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_BITREVERSE %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(i32) = G_BITREVERSE [[UV1]] + ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:vgpr(i32) = G_BITREVERSE [[UV]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[BITREVERSE]](i32), [[BITREVERSE1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_BITREVERSE %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir index a50c7fe0748b8..0f90145051114 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir @@ -24,6 +24,6 @@ body: | ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:sgpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BLOCK_ADDR]](p0) %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - S_ENDPGM 0, implicit %0 + S_ENDPGM 0, implicit %0(p0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir index 3b2b141539fc5..5c102eee6bd71 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir @@ -11,21 +11,23 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - G_BRCOND %2, %bb.1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + G_BRCOND %2(i1), %bb.1 bb.1: + ... --- @@ -37,23 +39,25 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; CHECK-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - G_BRCOND %2, %bb.1 + + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + G_BRCOND %2(i1), %bb.1 bb.1: + ... --- @@ -65,20 +69,22 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; CHECK-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $sgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - G_BRCOND %1, %bb.1 + + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + G_BRCOND %1(i1), %bb.1 bb.1: + ... --- @@ -90,20 +96,22 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_BRCOND [[COPY1]](i1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - G_BRCOND %1, %bb.1 + + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + G_BRCOND %1(i1), %bb.1 bb.1: + ... @@ -119,26 +127,31 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_BRCOND [[COPY1]](i1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 + + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) bb.1: - G_BRCOND %1, %bb.1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + G_BRCOND %1(i1), %bb.1 bb.2: + + ... @@ -152,26 +165,31 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_BRCOND [[COPY1]](i1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0 - %0:_(s32) = COPY $vgpr0 + + %0:_(i32) = COPY $vgpr0 bb.1: - %1:_(s1) = G_TRUNC %0 - G_BRCOND %1, %bb.1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + %1:_(i1) = G_TRUNC %0(i32) + G_BRCOND %1(i1), %bb.1 bb.2: + + ... # Extra instruction separates brcond from the condition def @@ -184,26 +202,31 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) ; CHECK-NEXT: S_NOP 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: G_BRCOND [[COPY1]](i1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $vgpr0 - %0:_(s32) = COPY $vgpr0 + + %0:_(i32) = COPY $vgpr0 bb.1: - %1:_(s1) = G_TRUNC %0 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + %1:_(i1) = G_TRUNC %0(i32) S_NOP 0 - G_BRCOND %1, %bb.1 + G_BRCOND %1(i1), %bb.1 bb.2: + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir index c44dd484920e7..e7b78d97171c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir @@ -12,11 +12,11 @@ body: | ; CHECK-LABEL: name: bswap_i32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(s32) = G_BSWAP [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_BSWAP %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(i32) = G_BSWAP [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_BSWAP %0 ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: bswap_i32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(s32) = G_BSWAP [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_BSWAP %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(i32) = G_BSWAP [[COPY]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_BSWAP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir index 0dc1165843e8f..5aeeabe4025be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) ... --- @@ -49,13 +49,13 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_vs ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) ... --- @@ -68,12 +68,12 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) ... --- @@ -88,14 +88,14 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_aa ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -110,15 +110,15 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_va ; CHECK: liveins: $vgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -133,15 +133,15 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_av ; CHECK: liveins: $vgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -156,16 +156,16 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_sa ; CHECK: liveins: $sgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -180,16 +180,16 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_as ; CHECK: liveins: $sgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(<2 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(<2 x i32>) ... --- @@ -204,16 +204,16 @@ body: | ; CHECK-LABEL: name: build_vector_v3s32_aaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s32) = COPY $agpr2 - %3:_(<3 x s32>) = G_BUILD_VECTOR %0, %1, %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<3 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i32) = COPY $agpr2 + %3:_(<3 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(<3 x i32>) ... --- @@ -228,18 +228,18 @@ body: | ; CHECK-LABEL: name: build_vector_v4s32_aaaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s32) = COPY $agpr2 - %3:_(s32) = COPY $agpr2 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i32) = COPY $agpr2 + %3:_(i32) = COPY $agpr2 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + S_ENDPGM 0, implicit %4(<4 x i32>) ... --- @@ -254,26 +254,26 @@ body: | ; CHECK-LABEL: name: build_vector_v8s32_aaaaaaaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY $agpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr(s32) = COPY $agpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(s32) = COPY $agpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:agpr(s32) = COPY $agpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:agpr(s32) = COPY $agpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<8 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s32) = COPY $agpr2 - %3:_(s32) = COPY $agpr3 - %4:_(s32) = COPY $agpr4 - %5:_(s32) = COPY $agpr5 - %6:_(s32) = COPY $agpr6 - %7:_(s32) = COPY $agpr7 - %8:_(<8 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7 - S_ENDPGM 0, implicit %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY $agpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr(i32) = COPY $agpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(i32) = COPY $agpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:agpr(i32) = COPY $agpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:agpr(i32) = COPY $agpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<8 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<8 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i32) = COPY $agpr2 + %3:_(i32) = COPY $agpr3 + %4:_(i32) = COPY $agpr4 + %5:_(i32) = COPY $agpr5 + %6:_(i32) = COPY $agpr6 + %7:_(i32) = COPY $agpr7 + %8:_(<8 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32) + S_ENDPGM 0, implicit %8(<8 x i32>) ... --- @@ -288,40 +288,40 @@ body: | ; CHECK-LABEL: name: build_vector_v16s32_aaaaaaaaaaaaaaaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY $agpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr(s32) = COPY $agpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(s32) = COPY $agpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:agpr(s32) = COPY $agpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:agpr(s32) = COPY $agpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:agpr(s32) = COPY $agpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:agpr(s32) = COPY $agpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:agpr(s32) = COPY $agpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:agpr(s32) = COPY $agpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:agpr(s32) = COPY $agpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:agpr(s32) = COPY $agpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:agpr(s32) = COPY $agpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:agpr(s32) = COPY $agpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s32) = COPY $agpr2 - %3:_(s32) = COPY $agpr3 - %4:_(s32) = COPY $agpr4 - %5:_(s32) = COPY $agpr5 - %6:_(s32) = COPY $agpr6 - %7:_(s32) = COPY $agpr7 - %8:_(s32) = COPY $agpr8 - %9:_(s32) = COPY $agpr9 - %10:_(s32) = COPY $agpr10 - %11:_(s32) = COPY $agpr11 - %12:_(s32) = COPY $agpr12 - %13:_(s32) = COPY $agpr13 - %14:_(s32) = COPY $agpr14 - %15:_(s32) = COPY $agpr15 - %16:_(<16 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - S_ENDPGM 0, implicit %16 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY $agpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr(i32) = COPY $agpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(i32) = COPY $agpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:agpr(i32) = COPY $agpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:agpr(i32) = COPY $agpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:agpr(i32) = COPY $agpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:agpr(i32) = COPY $agpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:agpr(i32) = COPY $agpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:agpr(i32) = COPY $agpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:agpr(i32) = COPY $agpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:agpr(i32) = COPY $agpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:agpr(i32) = COPY $agpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:agpr(i32) = COPY $agpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<16 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32), [[COPY4]](i32), [[COPY5]](i32), [[COPY6]](i32), [[COPY7]](i32), [[COPY8]](i32), [[COPY9]](i32), [[COPY10]](i32), [[COPY11]](i32), [[COPY12]](i32), [[COPY13]](i32), [[COPY14]](i32), [[COPY15]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x i32>) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i32) = COPY $agpr2 + %3:_(i32) = COPY $agpr3 + %4:_(i32) = COPY $agpr4 + %5:_(i32) = COPY $agpr5 + %6:_(i32) = COPY $agpr6 + %7:_(i32) = COPY $agpr7 + %8:_(i32) = COPY $agpr8 + %9:_(i32) = COPY $agpr9 + %10:_(i32) = COPY $agpr10 + %11:_(i32) = COPY $agpr11 + %12:_(i32) = COPY $agpr12 + %13:_(i32) = COPY $agpr13 + %14:_(i32) = COPY $agpr14 + %15:_(i32) = COPY $agpr15 + %16:_(<16 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32), %4(i32), %5(i32), %6(i32), %7(i32), %8(i32), %9(i32), %10(i32), %11(i32), %12(i32), %13(i32), %14(i32), %15(i32) + S_ENDPGM 0, implicit %16(<16 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir index 135aefa98e77c..11e7649c8badc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) ... --- @@ -49,13 +49,13 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_vs ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY2]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) ... --- @@ -68,12 +68,12 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) ... --- @@ -86,14 +86,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_aa ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:agpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s16>) = COPY $agpr0 - %1:_(<2 x s16>) = COPY $agpr1 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x i16>) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x i16>) = COPY $agpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:agpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i16>) = COPY $agpr0 + %1:_(<2 x i16>) = COPY $agpr1 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -106,15 +106,15 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_av ; CHECK: liveins: $agpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s16>) = COPY $agpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x i16>) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i16>) = COPY $agpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -127,15 +127,15 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_va ; CHECK: liveins: $agpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $agpr0 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x i16>) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY]](<2 x i16>), [[COPY2]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $agpr0 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -148,16 +148,16 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_as ; CHECK: liveins: $agpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s16>) = COPY $agpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x i16>) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i16>) = COPY $agpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<4 x i16>) ... --- @@ -170,14 +170,14 @@ body: | ; CHECK-LABEL: name: concat_vectors_v4s16_sa ; CHECK: liveins: $agpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $agpr0 - %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x i16>) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[COPY2]](<2 x i16>), [[COPY3]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $agpr0 + %2:_(<4 x i16>) = G_CONCAT_VECTORS %0(<2 x i16>), %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<4 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir index 233176281f8f3..6500e0f01df17 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir @@ -12,12 +12,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32)) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32)) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 1 - G_STORE %1, %0 :: (store (s32)) + %1:_(i32) = G_CONSTANT i32 1 + G_STORE %1(i32), %0(p1) :: (store (i32)) ... @@ -27,9 +27,9 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_constant_s32_sgpr_use - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[C]](s32) - %0:_(s32) = G_CONSTANT i32 1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 + ; CHECK: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[C]](i32) + %0:_(i32) = G_CONSTANT i32 1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir index bf8f2d633c1dc..489bcd097c859 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir @@ -14,10 +14,16 @@ body: | ; CHECK-LABEL: name: copy_s32_vgpr_to_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - $vgpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_vgpr_to_vgpr + ; WAVE32: liveins: $vgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + $vgpr0 = COPY %0(i32) ... @@ -31,10 +37,16 @@ body: | ; CHECK-LABEL: name: copy_s32_sgpr_to_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: $sgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - $sgpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: $sgpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_sgpr_to_sgpr + ; WAVE32: liveins: $sgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: $sgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + $sgpr0 = COPY %0(i32) ... @@ -48,10 +60,16 @@ body: | ; CHECK-LABEL: name: copy_s32_sgpr_to_vgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - $vgpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_sgpr_to_vgpr + ; WAVE32: liveins: $sgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + $vgpr0 = COPY %0(i32) ... @@ -65,10 +83,16 @@ body: | ; CHECK-LABEL: name: copy_s32_vgpr_to_agpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: $agpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - $agpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: $agpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_vgpr_to_agpr + ; WAVE32: liveins: $vgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: $agpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + $agpr0 = COPY %0(i32) ... @@ -82,10 +106,16 @@ body: | ; CHECK-LABEL: name: copy_s32_sgpr_to_agpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: $agpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - $agpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: $agpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_sgpr_to_agpr + ; WAVE32: liveins: $sgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: $agpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + $agpr0 = COPY %0(i32) ... @@ -99,10 +129,16 @@ body: | ; CHECK-LABEL: name: copy_s32_agpr_to_vgpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $agpr0 - $vgpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_agpr_to_vgpr + ; WAVE32: liveins: $agpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $agpr0 + $vgpr0 = COPY %0(i32) ... @@ -116,10 +152,16 @@ body: | ; CHECK-LABEL: name: copy_s32_agpr_to_agpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: $agpr0 = COPY [[COPY]](s32) - %0:_(s32) = COPY $agpr0 - $agpr0 = COPY %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: $agpr0 = COPY [[COPY]](i32) + ; + ; WAVE32-LABEL: name: copy_s32_agpr_to_agpr + ; WAVE32: liveins: $agpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; WAVE32-NEXT: $agpr0 = COPY [[COPY]](i32) + %0:_(i32) = COPY $agpr0 + $agpr0 = COPY %0(i32) ... @@ -133,14 +175,22 @@ body: | ; CHECK-LABEL: name: copy_s1_sgpr_to_vcc_preassigned ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s1) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s1) = G_TRUNC %0 - %2:vcc(s1) = COPY %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + ; + ; WAVE32-LABEL: name: copy_s1_sgpr_to_vcc_preassigned + ; WAVE32: liveins: $sgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i1) = G_TRUNC %0(i32) + %2:vcc(i1) = COPY %1(i1) + S_ENDPGM 0, implicit %2(i1) ... --- @@ -153,14 +203,22 @@ body: | ; CHECK-LABEL: name: copy_s1_vgpr_to_vcc_preassigned ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s1) - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s1) = G_TRUNC %0 - %2:vcc(s1) = COPY %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + ; + ; WAVE32-LABEL: name: copy_s1_vgpr_to_vcc_preassigned + ; WAVE32: liveins: $vgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i1) = G_TRUNC %0(i32) + %2:vcc(i1) = COPY %1(i1) + S_ENDPGM 0, implicit %2(i1) ... --- @@ -173,14 +231,22 @@ body: | ; CHECK-LABEL: name: copy_s1_sgpr_to_vcc ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:vcc(s1) = COPY %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + ; + ; WAVE32-LABEL: name: copy_s1_sgpr_to_vcc + ; WAVE32: liveins: $sgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:vcc(i1) = COPY %1(i1) + S_ENDPGM 0, implicit %2(i1) ... @@ -194,14 +260,22 @@ body: | ; CHECK-LABEL: name: copy_s1_vgpr_to_vcc ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:vcc(s1) = COPY %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + ; + ; WAVE32-LABEL: name: copy_s1_vgpr_to_vcc + ; WAVE32: liveins: $vgpr0 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:vcc(i1) = COPY %1(i1) + S_ENDPGM 0, implicit %2(i1) ... --- @@ -214,12 +288,20 @@ body: | ; CHECK-LABEL: name: wave64_copy_sgpr_64_to_s1 ; CHECK: liveins: $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]] - %0:_(s1) = COPY $sgpr4_sgpr5 - %1:_(s32) = G_ZEXT %0:_(s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; + ; WAVE32-LABEL: name: wave64_copy_sgpr_64_to_s1 + ; WAVE32: liveins: $sgpr4_sgpr5 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr4_sgpr5 + ; WAVE32-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + %0:_(i1) = COPY $sgpr4_sgpr5 + %1:_(i32) = G_ZEXT %0(i1) ... --- @@ -229,15 +311,23 @@ legalized: true body: | bb.0: liveins: $sgpr0 + ; CHECK-LABEL: name: wave32_copy_sgpr_32_to_s1 + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; ; WAVE32-LABEL: name: wave32_copy_sgpr_32_to_s1 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0 - ; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]] - %0:_(s1) = COPY $sgpr0 - %1:_(s32) = G_ZEXT %0:_(s1) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + %0:_(i1) = COPY $sgpr0 + %1:_(i32) = G_ZEXT %0(i1) ... --- @@ -250,18 +340,30 @@ body: | ; CHECK-LABEL: name: wave64_copy2_sgpr_64_to_s1 ; CHECK: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]] - ; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]] - %0:_(s1) = COPY $sgpr4_sgpr5 - %1:_(s1) = COPY $sgpr6_sgpr7 - %2:_(s32) = G_ZEXT %0:_(s1) - %3:_(s32) = G_ZEXT %1:_(s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY1]](i1), [[C2]], [[C3]] + ; + ; WAVE32-LABEL: name: wave64_copy2_sgpr_64_to_s1 + ; WAVE32: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr4_sgpr5 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY $sgpr6_sgpr7 + ; WAVE32-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY1]](i1), [[C2]], [[C3]] + %0:_(i1) = COPY $sgpr4_sgpr5 + %1:_(i1) = COPY $sgpr6_sgpr7 + %2:_(i32) = G_ZEXT %0(i1) + %3:_(i32) = G_ZEXT %1(i1) ... --- @@ -271,21 +373,33 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: wave32_copy2_sgpr_32_to_s1 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY1]](i1), [[C2]], [[C3]] + ; ; WAVE32-LABEL: name: wave32_copy2_sgpr_32_to_s1 ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr1 - ; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]] - ; WAVE32-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]] - %0:_(s1) = COPY $sgpr0 - %1:_(s1) = COPY $sgpr1 - %2:_(s32) = G_ZEXT %0:_(s1) - %3:_(s32) = G_ZEXT %1:_(s1) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY $sgpr1 + ; WAVE32-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY1]](i1), [[C2]], [[C3]] + %0:_(i1) = COPY $sgpr0 + %1:_(i1) = COPY $sgpr1 + %2:_(i32) = G_ZEXT %0(i1) + %3:_(i32) = G_ZEXT %1(i1) ... --- @@ -298,16 +412,16 @@ body: | ; CHECK-LABEL: name: copy_sgpr_64_to_s1_vgpr ; CHECK: liveins: $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i1) = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[COPY]](i1) ; ; WAVE32-LABEL: name: copy_sgpr_64_to_s1_vgpr ; WAVE32: liveins: $sgpr4_sgpr5 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5 - ; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1) - %0:vgpr(s1) = COPY $sgpr4_sgpr5 - %1:_(s32) = G_ZEXT %0:vgpr(s1) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i1) = COPY $sgpr4_sgpr5 + ; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[COPY]](i1) + %0:vgpr(i1) = COPY $sgpr4_sgpr5 + %1:_(i32) = G_ZEXT %0(i1) ... --- @@ -320,16 +434,16 @@ body: | ; CHECK-LABEL: name: copy_sgpr_32_to_s1_vgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i1) = COPY $sgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[COPY]](i1) ; ; WAVE32-LABEL: name: copy_sgpr_32_to_s1_vgpr ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0 - ; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1) - %0:vgpr(s1) = COPY $sgpr0 - %1:_(s32) = G_ZEXT %0:vgpr(s1) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i1) = COPY $sgpr0 + ; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[COPY]](i1) + %0:vgpr(i1) = COPY $sgpr0 + %1:_(i32) = G_ZEXT %0(i1) ... --- @@ -342,12 +456,20 @@ body: | ; CHECK-LABEL: name: wave64_copy_sgpr_64_to_s1_vcc ; CHECK: liveins: $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]] - %0:vcc(s1) = COPY $sgpr4_sgpr5 - %1:_(s32) = G_ZEXT %0:vcc(s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; + ; WAVE32-LABEL: name: wave64_copy_sgpr_64_to_s1_vcc + ; WAVE32: liveins: $sgpr4_sgpr5 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr4_sgpr5 + ; WAVE32-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + %0:vcc(i1) = COPY $sgpr4_sgpr5 + %1:_(i32) = G_ZEXT %0(i1) ... --- @@ -357,15 +479,23 @@ legalized: true body: | bb.0: liveins: $sgpr0 + ; CHECK-LABEL: name: wave32_copy_sgpr_32_to_s1_vcc + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + ; ; WAVE32-LABEL: name: wave32_copy_sgpr_32_to_s1_vcc ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0 - ; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]] - %0:vcc(s1) = COPY $sgpr0 - %1:_(s32) = G_ZEXT %0:vcc(s1) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY]](i1), [[C]], [[C1]] + %0:vcc(i1) = COPY $sgpr0 + %1:_(i32) = G_ZEXT %0(i1) ... --- @@ -378,19 +508,19 @@ body: | ; CHECK-LABEL: name: copy_virt_reg_to_s1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i1) = COPY [[TRUNC]](i1) ; ; WAVE32-LABEL: name: copy_virt_reg_to_s1 ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s1) = COPY %1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i1) = COPY [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i1) = COPY %1(i1) ... --- @@ -403,22 +533,22 @@ body: | ; CHECK-LABEL: name: copy_virt_reg_to_s1_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i1) = COPY [[COPY1]](i1) ; ; WAVE32-LABEL: name: copy_virt_reg_to_s1_vgpr ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:vgpr(s1) = COPY %1 - %3:_(s1) = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i1) = COPY [[TRUNC]](i1) + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i1) = COPY [[COPY1]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:vgpr(i1) = COPY %1(i1) + %3:_(i1) = COPY %2(i1) ... @@ -432,22 +562,22 @@ body: | ; CHECK-LABEL: name: copy_virt_reg_to_s1_vcc ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[COPY1]](i1) ; ; WAVE32-LABEL: name: copy_virt_reg_to_s1_vcc ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:vcc(s1) = COPY %1 - %3:_(s1) = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[COPY1]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:vcc(i1) = COPY %1(i1) + %3:_(i1) = COPY %2(i1) ... --- @@ -460,19 +590,19 @@ body: | ; CHECK-LABEL: name: copy_s1_to_sgpr_64 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](i1) ; ; WAVE32-LABEL: name: copy_s1_to_sgpr_64 ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - $sgpr4_sgpr5 = COPY %1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + $sgpr4_sgpr5 = COPY %1(i1) ... --- @@ -485,17 +615,17 @@ body: | ; CHECK-LABEL: name: copy_s1_to_sgpr_32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[TRUNC]](i1) ; ; WAVE32-LABEL: name: copy_s1_to_sgpr_32 ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; WAVE32-NEXT: $sgpr0 = COPY [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - $sgpr0 = COPY %1 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; WAVE32-NEXT: $sgpr0 = COPY [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + $sgpr0 = COPY %1(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir index 733d1342ff186..b80b206426dec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CTLZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -30,12 +30,12 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTLZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -48,12 +48,12 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s64_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(i32) = G_CTLZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_CTLZ_ZERO_UNDEF %0(i64) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -66,15 +66,15 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s64_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32) - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[AMDGPU_FFBH_U32_1]], [[C]] - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[ADD]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UMIN]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FFBH_U32 [[UV1]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FFBH_U32 [[UV]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[AMDGPU_FFBH_U32_1]], [[C]] + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(i32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[ADD]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UMIN]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTLZ_ZERO_UNDEF %0(i64) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir index 1034c3c180468..7365e080f4b03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: ctpop_s32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTPOP]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:sgpr(i32) = G_CTPOP [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTPOP]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CTPOP %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -30,12 +30,12 @@ body: | ; CHECK-LABEL: name: ctpop_s32_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTPOP]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:vgpr(i32) = G_CTPOP [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTPOP]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTPOP %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -49,12 +49,12 @@ body: | ; CHECK-LABEL: name: ctpop_s64_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTPOP]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:sgpr(i32) = G_CTPOP [[COPY]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTPOP]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_CTPOP %0(i64) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -68,13 +68,13 @@ body: | ; CHECK-LABEL: name: ctpop_s64_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV]](s32) - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTPOP1]], [[CTPOP]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTPOP %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:vgpr(i32) = G_CTPOP [[UV]](i32) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:vgpr(i32) = G_CTPOP [[UV1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[CTPOP1]], [[CTPOP]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTPOP %0(i64) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir index 8dfcefbcd32df..0caab3fb0e3b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CTTZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -30,12 +30,12 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CTTZ_ZERO_UNDEF %0(i32) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -48,12 +48,12 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s64_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(i32) = G_CTTZ_ZERO_UNDEF [[COPY]](i64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_CTTZ_ZERO_UNDEF %0(i64) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -66,15 +66,15 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s64_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBL_B32 [[UV]](s32) - ; CHECK-NEXT: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBL_B32 [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[AMDGPU_FFBL_B32_1]], [[C]] - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[ADD]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[UMIN]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FFBL_B32 [[UV]](i32) + ; CHECK-NEXT: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:vgpr(i32) = G_AMDGPU_FFBL_B32 [[UV1]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[AMDGPU_FFBL_B32_1]], [[C]] + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(i32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[ADD]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UMIN]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CTTZ_ZERO_UNDEF %0(i64) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir index bd699956500ca..b912e457466b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir @@ -9,8 +9,8 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_fconstant_f32_1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - %0:_(s32) = G_FCONSTANT float 1.0 + ; CHECK: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 1.000000e+00 + %0:_(f32) = G_FCONSTANT float 1.000000e+00 ... --- name: test_fconstant_f64_1 @@ -18,8 +18,8 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_fconstant_f64_1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 1.000000e+00 - %0:_(s64) = G_FCONSTANT double 1.0 + ; CHECK: [[C:%[0-9]+]]:sgpr(f64) = G_FCONSTANT double 1.000000e+00 + %0:_(f64) = G_FCONSTANT double 1.000000e+00 ... --- name: test_fconstant_f16_1 @@ -27,10 +27,12 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_fconstant_f16_1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH3C00 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) - %0:_(s16) = G_FCONSTANT half 1.0 - %1:_(s32) = G_ANYEXT %0 + ; CHECK: [[C:%[0-9]+]]:sgpr(f16) = G_FCONSTANT half 0xH3C00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i16) = G_BITCAST [[C]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST]](i16) + %0:_(f16) = G_FCONSTANT half 0xH3C00 + %1:_(i16) = G_BITCAST %0(f16) + %2:_(i32) = G_ANYEXT %1(i16) ... --- @@ -39,8 +41,8 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s32 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - %0:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:sgpr(f32) = G_IMPLICIT_DEF + %0:_(f32) = G_IMPLICIT_DEF ... --- @@ -49,6 +51,6 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s64 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s64) = G_IMPLICIT_DEF - %0:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:sgpr(f64) = G_IMPLICIT_DEF + %0:_(f64) = G_IMPLICIT_DEF ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir index 10517a49e697c..d0a979ed7db1e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir @@ -19,29 +19,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align1 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align1 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 1 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 1 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -58,29 +58,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align2 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align2 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 2 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 2 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -97,29 +97,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align4 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align4 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 4 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 4 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -136,29 +136,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align8 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align8 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 8 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 8 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -175,29 +175,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align16 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align16 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 16 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 16 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -214,35 +214,35 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align32 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047 - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2047 + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -2048 + ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align32 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1023 - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 - ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1023 + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1024 + ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 32 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 32 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -259,35 +259,35 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align64 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 - ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -4096 + ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align64 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047 - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2047 + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -2048 + ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 64 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 64 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -304,35 +304,35 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align128 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8191 - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192 - ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8191 + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -8192 + ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align128 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 - ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -4096 + ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) - %0:_(s32) = COPY $sgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 128 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $sgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 128 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -346,27 +346,27 @@ body: | bb.0: ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5) - %0:_(s32) = G_CONSTANT i32 32 - %1:_(p5) = G_DYN_STACKALLOC %0, 4 - S_ENDPGM 0, implicit %1 + %0:_(i32) = G_CONSTANT i32 32 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 4 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -383,29 +383,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5) - %0:_(s32) = G_CONSTANT i32 32 - %1:_(p5) = G_DYN_STACKALLOC %0, 8 - S_ENDPGM 0, implicit %1 + %0:_(i32) = G_CONSTANT i32 32 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 8 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -422,29 +422,29 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY [[COPY]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]](p5) - %0:_(s32) = G_CONSTANT i32 32 - %1:_(p5) = G_DYN_STACKALLOC %0, 16 - S_ENDPGM 0, implicit %1 + %0:_(i32) = G_CONSTANT i32 32 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 16 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -461,35 +461,35 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047 - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C3]](s32) - ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2047 + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -2048 + ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C3]](i32) + ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 32 + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C]], [[C1]](i32) ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1023 - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 - ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C3]](s32) - ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1023 + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[C2]](i32) + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1024 + ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C3]](i32) + ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) - %0:_(s32) = G_CONSTANT i32 32 - %1:_(p5) = G_DYN_STACKALLOC %0, 32 - S_ENDPGM 0, implicit %1 + %0:_(i32) = G_CONSTANT i32 32 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 32 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -506,31 +506,31 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align4 ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](i32), 0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align4 ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](i32), 0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $vgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 4 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $vgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 4 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -547,31 +547,31 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align16 ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](i32), 0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align16 ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](i32), 0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5) - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32) + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5) - %0:_(s32) = COPY $vgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 16 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $vgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 16 + S_ENDPGM 0, implicit %1(p5) ... --- @@ -588,35 +588,35 @@ body: | ; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align64 ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](i32), 0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](i32) ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 - ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4095 + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -4096 + ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) ; ; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align64 ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32) + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](i32), 0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](i32) ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047 - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2047 + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](i32) + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -2048 + ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](i32) + ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](i32) ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) - %0:_(s32) = COPY $vgpr0 - %1:_(p5) = G_DYN_STACKALLOC %0, 64 - S_ENDPGM 0, implicit %1 + %0:_(i32) = COPY $vgpr0 + %1:_(p5) = G_DYN_STACKALLOC %0(i32), 64 + S_ENDPGM 0, implicit %1(p5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir index de02b426776ef..f7b0a28d0fd92 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -17,21 +17,22 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_ss ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr16 + ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:sgpr(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x i32>), [[COPY1]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[EVEC]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_ss ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s32) = COPY $sgpr16 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr16 + ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:sgpr(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x i32>), [[COPY1]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i32) = COPY $sgpr16 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -46,113 +47,114 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_sv ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C8]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C9]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C10]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C11]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C12]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C13]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] - ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C8]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C9]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C10]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C11]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C12]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C13]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C14]] + ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C8]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C9]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C10]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C11]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C12]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C13]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] - ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) - %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C8]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C9]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C10]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C11]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C12]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C13]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C14]] + ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -167,21 +169,22 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vs ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:vgpr(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x i32>), [[COPY1]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[EVEC]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vs ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:vgpr(i32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x i32>), [[COPY1]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[EVEC]](i32) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -196,113 +199,114 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vv ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C8]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C9]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C10]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C11]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C12]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C13]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] - ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C8]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C9]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C10]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C11]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C12]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C13]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C14]] + ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C8]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C9]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C10]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C11]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C12]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C13]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] - ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C8]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C9]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C10]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C11]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C12]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C13]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C14]] + ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](i32) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %1(i32) + $vgpr0 = COPY %2(i32) ... --- @@ -317,21 +321,22 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v8s64_ss ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32) - ; WAVE64-NEXT: $sgpr0_sgpr1 = COPY [[EVEC]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr16 + ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:sgpr(i64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x i64>), [[COPY1]](i32) + ; WAVE64-NEXT: $sgpr0_sgpr1 = COPY [[EVEC]](i64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_ss ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32) - ; WAVE32-NEXT: $sgpr0_sgpr1 = COPY [[EVEC]](s64) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s32) = COPY $sgpr16 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $sgpr0_sgpr1 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr16 + ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:sgpr(i64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x i64>), [[COPY1]](i32) + ; WAVE32-NEXT: $sgpr0_sgpr1 = COPY [[EVEC]](i64) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i32) = COPY $sgpr16 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %1(i32) + $sgpr0_sgpr1 = COPY %2(i64) ... --- @@ -346,33 +351,34 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v8s64_vs ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE64-NEXT: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32) - ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE64-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x i32>) = G_BITCAST [[COPY]](<8 x i64>) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY1]], [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SHL]], [[C]] + ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:vgpr(i32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x i32>), [[SHL]](i32) + ; WAVE64-NEXT: [[EVEC1:%[0-9]+]]:vgpr(i32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x i32>), [[ADD]](i32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[EVEC]](i32), [[EVEC1]](i32) + ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vs ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE32-NEXT: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32) - ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $sgpr0 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; WAVE32-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x i32>) = G_BITCAST [[COPY]](<8 x i64>) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY1]], [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SHL]], [[C]] + ; WAVE32-NEXT: [[EVEC:%[0-9]+]]:vgpr(i32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x i32>), [[SHL]](i32) + ; WAVE32-NEXT: [[EVEC1:%[0-9]+]]:vgpr(i32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x i32>), [[ADD]](i32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[EVEC]](i32), [[EVEC1]](i32) + ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $sgpr0 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -387,83 +393,84 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v8s64_sv ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s32) = COPY $vgpr0 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i32) = COPY $vgpr0 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -478,83 +485,84 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v8s64_vv ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 - $vgpr0_vgpr1 = COPY %2 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %1(i32) + $vgpr0_vgpr1 = COPY %2(i64) ... --- @@ -569,121 +577,122 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vv_idx_add1 ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_ADD %1, %2 - %4:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0 = COPY %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](i32) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_ADD %1, %2 + %4:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %3(i32) + $vgpr0 = COPY %4(i32) ... --- @@ -698,121 +707,122 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vv_idx_addm1 ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_addm1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s32) = G_CONSTANT i32 -1 - %3:_(s32) = G_ADD %1, %2 - %4:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0 = COPY %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](i32) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i32) = G_CONSTANT i32 -1 + %3:_(i32) = G_ADD %1, %2 + %4:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %3(i32) + $vgpr0 = COPY %4(i32) ... --- @@ -827,121 +837,122 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vv_idx_add16 ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add16 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s32) = G_CONSTANT i32 16 - %3:_(s32) = G_ADD %1, %2 - %4:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0 = COPY %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](i32) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i32) = G_CONSTANT i32 16 + %3:_(i32) = G_ADD %1, %2 + %4:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %3(i32) + $vgpr0 = COPY %4(i32) ... --- @@ -956,91 +967,92 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v8s64_vv_idx_add1 ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s32) = COPY $vgpr16 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_ADD %1, %2 - %4:_(s64) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0_vgpr1 = COPY %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i32) = COPY $vgpr16 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_ADD %1, %2 + %4:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %3(i32) + $vgpr0_vgpr1 = COPY %4(i64) ... --- @@ -1055,121 +1067,122 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v16s32_sv_idx_add1 ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](i32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv_idx_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] - ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_ADD %1, %2 - %4:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0 = COPY %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<16 x i32>) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV2]], [[SELECT]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV3]], [[SELECT1]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV4]], [[SELECT2]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV5]], [[SELECT3]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV6]], [[SELECT4]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV7]], [[SELECT5]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C8]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV8]], [[SELECT6]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C9]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP8]](i1), [[UV9]], [[SELECT7]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C10]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP9]](i1), [[UV10]], [[SELECT8]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C11]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP10]](i1), [[UV11]], [[SELECT9]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C12]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP11]](i1), [[UV12]], [[SELECT10]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C13]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP12]](i1), [[UV13]], [[SELECT11]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C14]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP13]](i1), [[UV14]], [[SELECT12]] + ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C15]] + ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP14]](i1), [[UV15]], [[SELECT13]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT14]](i32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](i32) + %0:_(<16 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_ADD %1, %2 + %4:_(i32) = G_EXTRACT_VECTOR_ELT %0(<16 x i32>), %3(i32) + $vgpr0 = COPY %4(i32) ... --- @@ -1184,89 +1197,90 @@ body: | ; WAVE64-LABEL: name: extract_vector_elt_v8s64_sv_add1 ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_ADD %1, %2 - %4:_(s64) = G_EXTRACT_VECTOR_ELT %0, %3 - $vgpr0_vgpr1 = COPY %4 + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C1]] + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV2]], [[UV]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV3]], [[UV1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C2]] + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV4]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV5]], [[SELECT1]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C3]] + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV6]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV7]], [[SELECT3]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C4]] + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV8]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV9]], [[SELECT5]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C5]] + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV10]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV11]], [[SELECT7]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C6]] + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV12]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV13]], [[SELECT9]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[ADD]](i32), [[C7]] + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV14]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV15]], [[SELECT11]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT12]](i32) + ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT13]](i32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_CONSTANT i32 1 + %3:_(i32) = G_ADD %1, %2 + %4:_(i64) = G_EXTRACT_VECTOR_ELT %0(<8 x i64>), %3(i32) + $vgpr0_vgpr1 = COPY %4(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir index 85cb851b6bc3d..4faf38992f159 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: extract_lo32_i64_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s64), 0 - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_EXTRACT %0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(i32) = G_EXTRACT [[COPY]](i64), 0 + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_EXTRACT %0(i64), 0 ... --- @@ -28,10 +28,10 @@ body: | ; CHECK-LABEL: name: extract_lo32_i64_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s64), 0 - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_EXTRACT %0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:vgpr(i32) = G_EXTRACT [[COPY]](i64), 0 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_EXTRACT %0(i64), 0 ... --- @@ -44,10 +44,10 @@ body: | ; CHECK-LABEL: name: extract_s32_0_s1024_v ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s1024), 0 - %0:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - %1:_(s32) = G_EXTRACT %0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:vgpr(i32) = G_EXTRACT [[COPY]](i1024), 0 + %0:_(i1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(i32) = G_EXTRACT %0(i1024), 0 ... --- @@ -60,10 +60,10 @@ body: | ; CHECK-LABEL: name: extract_s32_0_s1024_s ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s1024), 0 - %0:_(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - %1:_(s32) = G_EXTRACT %0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(i32) = G_EXTRACT [[COPY]](i1024), 0 + %0:_(i1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:_(i32) = G_EXTRACT %0(i1024), 0 ... --- @@ -76,10 +76,10 @@ body: | ; CHECK-LABEL: name: extract_lo32_i64_a ; CHECK: liveins: $agpr0_agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:agpr(s32) = G_EXTRACT [[COPY]](s64), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) - %0:_(s64) = COPY $agpr0_agpr1 - %1:_(s32) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:agpr(i32) = G_EXTRACT [[COPY]](i64), 0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](i32) + %0:_(i64) = COPY $agpr0_agpr1 + %1:_(i32) = G_EXTRACT %0(i64), 0 + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fabs.mir index 55329b008446e..f6f93c46c5d45 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fabs.mir @@ -12,12 +12,16 @@ body: | ; CHECK-LABEL: name: fabs_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:sgpr(s32) = G_FABS [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FABS]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FABS %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:sgpr(f32) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[FABS]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FABS %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -30,10 +34,14 @@ body: | ; CHECK-LABEL: name: fabs_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FABS:%[0-9]+]]:vgpr(s32) = G_FABS [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FABS]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FABS %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FABS:%[0-9]+]]:vgpr(f32) = G_FABS [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FABS]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FABS %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir index 4fba30325f98b..eb7e6b7031346 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: fadd_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_FADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 ... --- @@ -32,13 +36,17 @@ body: | ; CHECK-LABEL: name: fadd_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_FADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY2]], [[BITCAST1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 ... --- @@ -51,13 +59,17 @@ body: | ; CHECK-LABEL: name: fadd_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_FADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[BITCAST]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 ... --- @@ -70,10 +82,14 @@ body: | ; CHECK-LABEL: name: fadd_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FADD %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir index fac4fa5580b49..6ce7ab0d251f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir @@ -12,13 +12,17 @@ body: | ; CHECK-LABEL: name: fcanonicalize_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FCANONICALIZE %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[COPY1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCANONICALIZE %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -31,10 +35,14 @@ body: | ; CHECK-LABEL: name: fcanonicalize_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCANONICALIZE %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(f32) = G_FCANONICALIZE [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FCANONICALIZE]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCANONICALIZE %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir index 593b1c6a2ebfa..7190ca46c4500 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: fceil_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(s32) = G_FCEIL [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FCEIL %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(f32) = G_FCEIL [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCEIL %1 ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: fceil_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(s32) = G_FCEIL [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FCEIL %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(f32) = G_FCEIL [[BITCAST]] + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FCEIL %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir index 0cb6727f6fa05..7af094694249a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir @@ -14,21 +14,28 @@ body: | ; GFX803-LABEL: name: fcmp_ss ; GFX803: liveins: $sgpr0, $sgpr1 ; GFX803-NEXT: {{ $}} - ; GFX803-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX803-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX803-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX803-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX803-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY3]] + ; GFX803-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX803-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX803-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX803-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX803-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; GFX803-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; GFX803-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(uge), [[COPY2]](f32), [[COPY3]] + ; ; GFX1150-LABEL: name: fcmp_ss ; GFX1150: liveins: $sgpr0, $sgpr1 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:sgpr(s32) = G_FCMP floatpred(uge), [[COPY]](s32), [[COPY1]] - ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[FCMP]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_FCMP floatpred(uge), %0(s32), %1 + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:sgpr(i32) = G_FCMP floatpred(uge), [[BITCAST]](f32), [[BITCAST1]] + ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[FCMP]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i1) = G_FCMP floatpred(uge), %2(f32), %3 ... --- @@ -41,13 +48,17 @@ body: | ; GCN-LABEL: name: fcmp_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_FCMP floatpred(uge), %0, %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; GCN-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(uge), [[COPY2]](f32), [[BITCAST1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(i1) = G_FCMP floatpred(uge), %2(f32), %3 ... --- @@ -60,13 +71,17 @@ body: | ; GCN-LABEL: name: fcmp_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_FCMP floatpred(uge), %1, %0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; GCN-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(uge), [[BITCAST]](f32), [[COPY2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %1(i32) + %3:_(f32) = G_BITCAST %0(i32) + %4:_(i1) = G_FCMP floatpred(uge), %2(f32), %3 ... --- @@ -79,10 +94,10 @@ body: | ; GCN-LABEL: name: fcmp_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP floatpred(uge), [[COPY]](s32), [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP floatpred(uge), %0, %1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GCN-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP floatpred(uge), [[COPY]](i32), [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP floatpred(uge), %0(i32), %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir index 6b14849556faf..4ee8bb537bbed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: fexp2_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(s32) = G_FEXP2 [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FEXP2 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(f32) = G_FEXP2 [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FEXP2 %1 ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: fexp2_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(s32) = G_FEXP2 [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FEXP2 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(f32) = G_FEXP2 [[BITCAST]] + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FEXP2 %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir index 65b205f66d5f9..c2d223054c184 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: flog2_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(s32) = G_FLOG2 [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FLOG2 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(f32) = G_FLOG2 [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FLOG2 %1 ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: flog2_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(s32) = G_FLOG2 [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FLOG2 %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(f32) = G_FLOG2 [[BITCAST]] + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FLOG2 %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir index d63fc07ada772..6491861faf2c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir @@ -12,17 +12,23 @@ body: | ; CHECK-LABEL: name: fma_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_vss @@ -34,16 +40,22 @@ body: | ; CHECK-LABEL: name: fma_vss ; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY4]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[BITCAST]], [[COPY3]], [[COPY4]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_svs @@ -55,16 +67,22 @@ body: | ; CHECK-LABEL: name: fma_svs ; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY1]], [[COPY4]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[COPY3]], [[BITCAST1]], [[COPY4]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_ssv @@ -76,16 +94,22 @@ body: | ; CHECK-LABEL: name: fma_ssv ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[COPY3]], [[COPY4]], [[BITCAST2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_vvs @@ -97,15 +121,21 @@ body: | ; CHECK-LABEL: name: fma_vvs ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY3]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST2]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[COPY3]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_vsv @@ -117,15 +147,21 @@ body: | ; CHECK-LABEL: name: fma_vsv ; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[BITCAST]], [[COPY3]], [[BITCAST2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_svv @@ -137,15 +173,21 @@ body: | ; CHECK-LABEL: name: fma_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[COPY3]], [[BITCAST1]], [[BITCAST2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... --- name: fma_vvv @@ -157,12 +199,18 @@ body: | ; CHECK-LABEL: name: fma_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMA %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY2]](i32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(f32) = G_FMA [[BITCAST]], [[BITCAST1]], [[BITCAST2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(f32) = G_BITCAST %0(i32) + %4:_(f32) = G_BITCAST %1(i32) + %5:_(f32) = G_BITCAST %2(i32) + %6:_(f32) = G_FMA %3, %4, %5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir index 5766c05426b2d..7190d3232bccf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: fmul_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_FMUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 ... --- @@ -32,13 +36,17 @@ body: | ; CHECK-LABEL: name: fmul_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_FMUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[COPY2]], [[BITCAST1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 ... --- @@ -51,13 +59,17 @@ body: | ; CHECK-LABEL: name: fmul_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_FMUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 ... --- @@ -70,10 +82,14 @@ body: | ; CHECK-LABEL: name: fmul_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(f32) = G_FMUL [[BITCAST]], [[BITCAST1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FMUL %2, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fneg.mir index 214dc50c9950a..52042860c0d3f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fneg.mir @@ -12,12 +12,16 @@ body: | ; CHECK-LABEL: name: fneg_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FNEG %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:sgpr(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FNEG %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -30,10 +34,14 @@ body: | ; CHECK-LABEL: name: fneg_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FNEG %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(f32) = G_FNEG [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FNEG]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FNEG %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir index df88b99d2ab9b..8545287ded7d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: fpext_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s64) = G_FPEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(f64) = G_FPEXT [[COPY1]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f64) = G_FPEXT %1(f32) ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: fpext_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_FPEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(f64) = G_FPEXT [[BITCAST]](f32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f64) = G_FPEXT %1(f32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir index c690f8439098f..902da09fdcbbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: fptosi_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FPTOSI %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(i32) = G_FPTOSI [[COPY1]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOSI %1(f32) ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: fptosi_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FPTOSI %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(i32) = G_FPTOSI [[BITCAST]](f32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOSI %1(f32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir index 17e656340f780..cb9ee00c15e91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: fptoui_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FPTOUI %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[COPY1]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOUI %1(f32) ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: fptoui_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FPTOUI %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[BITCAST]](f32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOUI %1(f32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir index 53147d8435ea7..e1635dff2d4c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: fptrunc_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[COPY1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_FPTRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST]](f64) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(f32) = G_FPTRUNC [[COPY1]](f64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f32) = G_FPTRUNC %1(f64) ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: fptrunc_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[COPY]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_FPTRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f64) = G_BITCAST [[COPY]](i64) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(f32) = G_FPTRUNC [[BITCAST]](f64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f32) = G_FPTRUNC %1(f64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir index 2b60dcd9dcabe..499f0c6a5070b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir @@ -26,7 +26,7 @@ body: | ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0 ; CHECK-NEXT: $sgpr0 = COPY [[FRAME_INDEX]](p5) %0:_(p5) = G_FRAME_INDEX %stack.0 - $sgpr0 = COPY %0 + $sgpr0 = COPY %0(p5) ... @@ -41,6 +41,6 @@ body: | ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0 ; CHECK-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) %0:_(p5) = G_FRAME_INDEX %stack.0 - $vgpr0 = COPY %0 + $vgpr0 = COPY %0(p5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir index 24bc62c4a7253..5aacadf1d41db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir @@ -13,16 +13,16 @@ body: | ; CHECK-LABEL: name: test_freeze_s1_vgpr_to_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s1) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0(s32) - %2:_(s1) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2(s1) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i1) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[FREEZE]](i1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i1) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i1) + $vgpr0 = COPY %3(i32) ... @@ -36,16 +36,16 @@ body: | ; CHECK-LABEL: name: test_freeze_s1_vgpr_to_agpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s1) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s1) - ; CHECK-NEXT: $agpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0(s32) - %2:_(s1) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2(s1) - $agpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i1) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[FREEZE]](i1) + ; CHECK-NEXT: $agpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i1) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i1) + $agpr0 = COPY %3(i32) ... @@ -59,16 +59,16 @@ body: | ; CHECK-LABEL: name: test_freeze_s1_sgpr_to_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(s1) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[FREEZE]](s1) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0(s32) - %2:_(s1) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2(s1) - $sgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(i1) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[FREEZE]](i1) + ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i1) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i1) + $sgpr0 = COPY %3(i32) ... @@ -82,16 +82,16 @@ body: | ; CHECK-LABEL: name: test_freeze_s1_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vcc(s1) = G_FREEZE [[ICMP]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[FREEZE]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0(s32), %1 - %3:_(s1) = G_FREEZE %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vcc(i1) = G_FREEZE [[ICMP]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[FREEZE]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i1) = G_FREEZE %2 + S_ENDPGM 0, implicit %3(i1) ... @@ -105,16 +105,16 @@ body: | ; CHECK-LABEL: name: test_freeze_s16_vgpr_to_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s16) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = G_FREEZE %1 - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i16) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[FREEZE]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i16) = G_FREEZE %1 + %3:_(i32) = G_ANYEXT %2(i16) + $vgpr0 = COPY %3(i32) ... @@ -128,12 +128,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_vgpr_to_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FREEZE %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... @@ -147,12 +147,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $sgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FREEZE %0 - $sgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $sgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_FREEZE %0 + $sgpr0 = COPY %1(i32) ... @@ -166,12 +166,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_vgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FREEZE %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... @@ -185,12 +185,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_vgpr_to_agpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $agpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FREEZE %0 - $agpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $agpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_FREEZE %0 + $agpr0 = COPY %1(i32) ... @@ -204,12 +204,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_agpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $agpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FREEZE %0 - $agpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $agpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_FREEZE %0 + $agpr0 = COPY %1(i32) ... @@ -223,12 +223,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_agpr_to_vgpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:agpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = G_FREEZE %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:agpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = G_FREEZE %0 + $vgpr0 = COPY %1(i32) ... @@ -242,12 +242,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s32_agpr_to_agpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:agpr(s32) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $agpr0 = COPY [[FREEZE]](s32) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = G_FREEZE %0 - $agpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:agpr(i32) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $agpr0 = COPY [[FREEZE]](i32) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = G_FREEZE %0 + $agpr0 = COPY %1(i32) ... @@ -261,12 +261,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s64) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i64) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(i64) ... --- @@ -279,12 +279,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s128) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128) - %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s128) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i128) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](i128) + %0:_(i128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i128) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(i128) ... --- @@ -297,12 +297,12 @@ body: | ; CHECK-LABEL: name: test_freeze_256 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s256) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256) - %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(s256) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i256) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](i256) + %0:_(i256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(i256) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(i256) ... --- @@ -315,12 +315,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s512 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s512) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) - %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s512) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i512) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](i512) + %0:_(i512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i512) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(i512) ... --- @@ -333,12 +333,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v2s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<2 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1(<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<2 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<2 x i32>) ... --- @@ -351,12 +351,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v3s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<3 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<3 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x i32>) + %0:_(<3 x i32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x i32>) ... --- @@ -369,12 +369,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v4s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<4 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<4 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x i32>) ... --- @@ -387,12 +387,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v5s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<5 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>) - %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - %1:_(<5 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<5 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x i32>) + %0:_(<5 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:_(<5 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x i32>) ... --- @@ -405,12 +405,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<8 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>) - %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - %1:_(<8 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<8 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x i32>) + %0:_(<8 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x i32>) ... --- @@ -423,12 +423,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v16s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<16 x s32>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>) - %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(<16 x s32>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<16 x i32>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x i32>) + %0:_(<16 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x i32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x i32>) ... --- @@ -441,12 +441,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v2s16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FREEZE %0 - $vgpr0 = COPY %1(<2 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<2 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = G_FREEZE %0 + $vgpr0 = COPY %1(<2 x i16>) ... --- @@ -459,12 +459,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v4s16 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<4 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1 = COPY %1(<4 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<4 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<4 x i16>) ... --- @@ -477,12 +477,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v6s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<6 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x s16>) - %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<6 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<6 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x i16>) + %0:_(<6 x i16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x i16>) ... --- @@ -495,12 +495,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v8s16 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<8 x s16>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x s16>) - %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<8 x s16>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<8 x i16>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x i16>) + %0:_(<8 x i16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<8 x i16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x i16>) ... --- @@ -513,12 +513,12 @@ body: | ; CHECK-LABEL: name: test_freeze_v2s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<2 x s64>) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FREEZE %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(<2 x i64>) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x i64>) + %0:_(<2 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x i64>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x i64>) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir index b1a55fe7bc42f..6a6c763364d92 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir @@ -12,17 +12,17 @@ body: | ; CHECK-LABEL: name: fshr_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_vss @@ -34,16 +34,16 @@ body: | ; CHECK-LABEL: name: fshr_vss ; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_svs @@ -55,16 +55,16 @@ body: | ; CHECK-LABEL: name: fshr_svs ; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_ssv @@ -76,16 +76,16 @@ body: | ; CHECK-LABEL: name: fshr_ssv ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_vvs @@ -97,15 +97,15 @@ body: | ; CHECK-LABEL: name: fshr_vvs ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_vsv @@ -117,15 +117,15 @@ body: | ; CHECK-LABEL: name: fshr_vsv ; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_svv @@ -137,15 +137,15 @@ body: | ; CHECK-LABEL: name: fshr_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... --- name: fshr_vvv @@ -157,12 +157,12 @@ body: | ; CHECK-LABEL: name: fshr_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FSHR %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(i32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_FSHR %0, %1, %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir index 17b999adf7e56..71a463bbefec3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir @@ -12,13 +12,17 @@ body: | ; CHECK-LABEL: name: fsqrt_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSQRT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FSQRT %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(f32) = G_FSQRT [[COPY1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FSQRT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSQRT %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -31,10 +35,14 @@ body: | ; CHECK-LABEL: name: fsqrt_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSQRT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FSQRT %0 - $vgpr0 = COPY %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(f32) = G_FSQRT [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(i32) = G_BITCAST [[FSQRT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSQRT %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir index 33b39180d22ea..7bfb5250f17af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir @@ -12,14 +12,18 @@ body: | ; CHECK-LABEL: name: fsub_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_FSUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(f32) = G_FSUB [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FSUB %2, %3 ... --- @@ -32,13 +36,17 @@ body: | ; CHECK-LABEL: name: fsub_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_FSUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(f32) = G_FSUB [[COPY2]], [[BITCAST1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FSUB %2, %3 ... --- @@ -51,13 +59,17 @@ body: | ; CHECK-LABEL: name: fsub_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_FSUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(f32) = G_FSUB [[BITCAST]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FSUB %2, %3 ... --- @@ -70,10 +82,14 @@ body: | ; CHECK-LABEL: name: fsub_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FSUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(f32) = G_FSUB [[BITCAST]], [[BITCAST1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FSUB %2, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir index b9786893fbac2..59ccb859dbb40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir @@ -14,20 +14,21 @@ body: | ; GFX7-LABEL: name: icmp_eq_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; ; GFX8-LABEL: name: icmp_eq_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 ... --- @@ -40,20 +41,21 @@ body: | ; GFX7-LABEL: name: icmp_eq_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_eq_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 ... --- @@ -66,20 +68,21 @@ body: | ; GFX7-LABEL: name: icmp_eq_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY2]] + ; ; GFX8-LABEL: name: icmp_eq_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_ICMP intpred(eq), %1, %0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_ICMP intpred(eq), %1(i32), %0 ... --- @@ -92,18 +95,19 @@ body: | ; GFX7-LABEL: name: icmp_eq_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_eq_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 ... --- @@ -116,21 +120,22 @@ body: | ; GFX7-LABEL: name: icmp_eq_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[COPY1]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i64), [[COPY3]] + ; ; GFX8-LABEL: name: icmp_eq_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s64), [[COPY1]] - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i64), [[COPY1]] + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i1) = G_ICMP intpred(eq), %0(i64), %1 ... --- @@ -143,20 +148,21 @@ body: | ; GFX7-LABEL: name: icmp_eq_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i64), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_eq_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i64), [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i64), %1 ... --- @@ -169,20 +175,21 @@ body: | ; GFX7-LABEL: name: icmp_eq_s64_vs ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i64), [[COPY2]] + ; ; GFX8-LABEL: name: icmp_eq_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[COPY2]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %1, %0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i64), [[COPY2]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %1(i64), %0 ... --- @@ -195,18 +202,19 @@ body: | ; GFX7-LABEL: name: icmp_eq_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i64), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_eq_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[COPY1]] - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i64), [[COPY1]] + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i1) = G_ICMP intpred(eq), %0(i64), %1 ... --- @@ -219,21 +227,22 @@ body: | ; GFX7-LABEL: name: icmp_ne_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[COPY1]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i64), [[COPY3]] + ; ; GFX8-LABEL: name: icmp_ne_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i64), [[COPY1]] + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i1) = G_ICMP intpred(ne), %0(i64), %1 ... --- @@ -246,20 +255,21 @@ body: | ; GFX7-LABEL: name: icmp_ne_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i64), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_ne_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i64), [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %0(i64), %1 ... --- @@ -272,20 +282,21 @@ body: | ; GFX7-LABEL: name: icmp_ne_s64_vs ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i64), [[COPY2]] + ; ; GFX8-LABEL: name: icmp_ne_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[COPY2]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(ne), %1, %0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i64), [[COPY2]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(ne), %1(i64), %0 ... --- @@ -298,18 +309,19 @@ body: | ; GFX7-LABEL: name: icmp_ne_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_ne_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i64), [[COPY1]] + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i1) = G_ICMP intpred(ne), %0(i64), %1 ... --- @@ -322,22 +334,23 @@ body: | ; GFX7-LABEL: name: icmp_slt_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[COPY1]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY2]](i64), [[COPY3]] + ; ; GFX8-LABEL: name: icmp_slt_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY3]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s1) = G_ICMP intpred(slt), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[COPY1]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY2]](i64), [[COPY3]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i1) = G_ICMP intpred(slt), %0(i64), %1 ... --- @@ -350,20 +363,21 @@ body: | ; GFX7-LABEL: name: icmp_slt_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY2]](i64), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_slt_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(slt), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY2]](i64), [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(slt), %0(i64), %1 ... --- @@ -376,20 +390,21 @@ body: | ; GFX7-LABEL: name: icmp_slt_s64_vs ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY1]](i64), [[COPY2]] + ; ; GFX8-LABEL: name: icmp_slt_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[COPY2]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s1) = G_ICMP intpred(slt), %1, %0 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY1]](i64), [[COPY2]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i1) = G_ICMP intpred(slt), %1(i64), %0 ... --- @@ -402,18 +417,19 @@ body: | ; GFX7-LABEL: name: icmp_slt_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + ; ; GFX8-LABEL: name: icmp_slt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s1) = G_ICMP intpred(slt), %0, %1 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[COPY]](i64), [[COPY1]] + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i1) = G_ICMP intpred(slt), %0(i64), %1 ... # Result is already constrained to be VCC bank, despite scalar inputs. @@ -428,25 +444,26 @@ body: | ; GFX7-LABEL: name: map_icmp_already_vcc_bank_sgpr_inputs ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) + ; ; GFX8-LABEL: name: map_icmp_already_vcc_bank_sgpr_inputs ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:vcc(i1) = G_ICMP intpred(eq), %0(i32), %1 + S_ENDPGM 0, implicit %2(i1) ... # Result is already con strained to be VCC bank, despite scalar inputs @@ -462,23 +479,24 @@ body: | ; GFX7-LABEL: name: map_icmp_already_vcc_regclass_sgpr_inputs ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) + ; ; GFX8-LABEL: name: map_icmp_already_vcc_regclass_sgpr_inputs ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:sreg_64_xexec(s1) = G_ICMP intpred(eq), %0, %1 - S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:sreg_64_xexec(i1) = G_ICMP intpred(eq), %0(i32), %1 + S_ENDPGM 0, implicit %2(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir index 1b6689d95a904..bd3a4a21d6207 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir @@ -12,18 +12,18 @@ body: | ; GFX8-LABEL: name: icmp_eq_s16_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC1]](i16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i16), [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i1) = G_ICMP intpred(eq), %2(i16), %3 ... --- @@ -36,17 +36,17 @@ body: | ; GFX8-LABEL: name: icmp_eq_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[TRUNC1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i16), [[TRUNC1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i1) = G_ICMP intpred(eq), %2(i16), %3 ... --- @@ -59,17 +59,17 @@ body: | ; GFX8-LABEL: name: icmp_eq_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[TRUNC1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i16), [[TRUNC1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i1) = G_ICMP intpred(eq), %2(i16), %3 ... --- @@ -82,14 +82,14 @@ body: | ; GFX8-LABEL: name: icmp_eq_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[TRUNC]](s16), [[TRUNC1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[TRUNC]](i16), [[TRUNC1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i1) = G_ICMP intpred(eq), %2(i16), %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-illegal-copy.mir index 8a26fbeb1d11c..1301b63192cfc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-illegal-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-illegal-copy.mir @@ -3,7 +3,7 @@ # Check behavior for illegal copies. -# CHECK: LLVM ERROR: unable to map instruction: $sgpr0 = COPY %0:vgpr(s32) (in function: illegal_copy_s32_v_to_s) +# CHECK: LLVM ERROR: unable to map instruction: $sgpr0 = COPY %0:vgpr(i32) (in function: illegal_copy_s32_v_to_s) --- name: illegal_copy_s32_v_to_s @@ -13,6 +13,6 @@ body: | bb.0: liveins: $vgpr0 - %0:_(s32) = COPY $vgpr0 + %0:_(i32) = COPY $vgpr0 $sgpr0 = COPY %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir index ee8dbf1adb7b4..22b0eff9c84d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir @@ -13,12 +13,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[DEF]](i32) + ; CHECK-NEXT: G_STORE [[COPY1]](i32), [[COPY]](p1) :: (store (i32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (s32), addrspace 1) + %1:_(i32) = G_IMPLICIT_DEF + G_STORE %1(i32), %0(p1) :: (store (i32), addrspace 1) ... @@ -28,10 +28,10 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s32_sgpr_use - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[DEF]](s32) - %0:_(s32) = G_IMPLICIT_DEF - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 + ; CHECK: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[DEF]](i32) + %0:_(i32) = G_IMPLICIT_DEF + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0(i32) ... @@ -46,12 +46,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[DEF]](s64) - ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY [[DEF]](i64) + ; CHECK-NEXT: G_STORE [[COPY1]](i64), [[COPY]](p1) :: (store (i64), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (s64), addrspace 1) + %1:_(i64) = G_IMPLICIT_DEF + G_STORE %1(i64), %0(p1) :: (store (i64), addrspace 1) ... @@ -66,12 +66,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<3 x s32>) = COPY [[DEF]](<3 x s32>) - ; CHECK-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<3 x i32>) = COPY [[DEF]](<3 x i32>) + ; CHECK-NEXT: G_STORE [[COPY1]](<3 x i32>), [[COPY]](p1) :: (store (<3 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<3 x s32>), addrspace 1, align 4) + %1:_(<3 x i32>) = G_IMPLICIT_DEF + G_STORE %1(<3 x i32>), %0(p1) :: (store (<3 x i32>), align 4, addrspace 1) ... @@ -86,12 +86,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x i32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i32>) = COPY [[DEF]](<4 x i32>) + ; CHECK-NEXT: G_STORE [[COPY1]](<4 x i32>), [[COPY]](p1) :: (store (<4 x i32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store (<4 x s32>), addrspace 1, align 4) + %1:_(<4 x i32>) = G_IMPLICIT_DEF + G_STORE %1(<4 x i32>), %0(p1) :: (store (<4 x i32>), align 4, addrspace 1) ... @@ -101,10 +101,10 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s64_sgpr_use - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[DEF]](s64) - %0:_(s64) = G_IMPLICIT_DEF - $sgpr8_sgpr9 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:sgpr(i64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[DEF]](i64) + %0:_(i64) = G_IMPLICIT_DEF + $sgpr8_sgpr9 = COPY %0(i64) ... @@ -114,10 +114,10 @@ legalized: true body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s128_sgpr_use - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s128) = G_IMPLICIT_DEF - ; CHECK-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY [[DEF]](s128) - %0:_(s128) = G_IMPLICIT_DEF - $sgpr8_sgpr9_sgpr10_sgpr11 = COPY %0 + ; CHECK: [[DEF:%[0-9]+]]:sgpr(i128) = G_IMPLICIT_DEF + ; CHECK-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY [[DEF]](i128) + %0:_(i128) = G_IMPLICIT_DEF + $sgpr8_sgpr9_sgpr10_sgpr11 = COPY %0(i128) ... @@ -130,18 +130,18 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1_sgpr_use ; CHECK: liveins: $sgpr8, $sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) - %0:_(s32) = COPY $sgpr8 - %1:_(s32) = COPY $sgpr9 - %2:_(s1) = G_IMPLICIT_DEF - %3:_(s32) = G_SELECT %2, %0, %1 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr9 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[DEF]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT]](i32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](i32) + %0:_(i32) = COPY $sgpr8 + %1:_(i32) = COPY $sgpr9 + %2:_(i1) = G_IMPLICIT_DEF + %3:_(i32) = G_SELECT %2(i1), %0, %1 + S_ENDPGM 0, implicit %3(i32) ... @@ -154,18 +154,18 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1_vcc_use ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_IMPLICIT_DEF - %3:_(s32) = G_SELECT %2, %0, %1 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[DEF]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY2]](i1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_IMPLICIT_DEF + %3:_(i32) = G_SELECT %2(i1), %0, %1 + S_ENDPGM 0, implicit %3(i32) ... @@ -178,26 +178,27 @@ body: | ; FAST-LABEL: name: test_implicit_def_s1_explicit_vcc_use_0 ; FAST: liveins: $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[COPY]], [[COPY1]] - ; FAST-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[DEF]](i32) + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY2]](i1), [[COPY]], [[COPY1]] + ; FAST-NEXT: S_ENDPGM 0, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: test_implicit_def_s1_explicit_vcc_use_0 ; GREEDY: liveins: $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[DEF:%[0-9]+]]:vcc(s1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[DEF]](s1), [[COPY]], [[COPY1]] - ; GREEDY-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:vcc(s1) = G_IMPLICIT_DEF - %3:_(s32) = G_SELECT %2, %0, %1 - S_ENDPGM 0, implicit %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:vcc(i1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[DEF]](i1), [[COPY]], [[COPY1]] + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[SELECT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:vcc(i1) = G_IMPLICIT_DEF + %3:_(i32) = G_SELECT %2(i1), %0, %1 + S_ENDPGM 0, implicit %3(i32) ... @@ -207,14 +208,15 @@ legalized: true body: | bb.0: ; FAST-LABEL: name: test_implicit_def_s1_explicit_vcc_use_1 - ; FAST: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; FAST-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: S_ENDPGM 0, implicit [[COPY]](s1) + ; FAST: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[DEF]](i32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: S_ENDPGM 0, implicit [[COPY]](i1) + ; ; GREEDY-LABEL: name: test_implicit_def_s1_explicit_vcc_use_1 - ; GREEDY: [[DEF:%[0-9]+]]:vcc(s1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: S_ENDPGM 0, implicit [[DEF]](s1) - %2:vcc(s1) = G_IMPLICIT_DEF - S_ENDPGM 0, implicit %2 + ; GREEDY: [[DEF:%[0-9]+]]:vcc(i1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[DEF]](i1) + %0:vcc(i1) = G_IMPLICIT_DEF + S_ENDPGM 0, implicit %0(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir index 97ca93ad33418..a7c5a7133edbd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -13,29 +13,29 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY1]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[COPY1]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP2]](s32), [[COPY1]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP3]](s32), [[COPY1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr4 - %2:_(s32) = COPY $sgpr5 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ICMP]](i32), [[COPY1]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:sgpr(i32) = G_SELECT [[ICMP1]](i32), [[COPY1]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:sgpr(i32) = G_SELECT [[ICMP2]](i32), [[COPY1]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:sgpr(i32) = G_SELECT [[ICMP3]](i32), [[COPY1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $sgpr4 + %2:_(i32) = COPY $sgpr5 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3(<4 x i32>) ... --- @@ -49,34 +49,34 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY6]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY7]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C2]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY6]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY7]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -90,31 +90,31 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr4 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i32>) = COPY [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY4]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY4]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY1]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY4]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY1]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY4]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr4 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -129,34 +129,34 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY6]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY7]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr4 - %2:_(s32) = COPY $vgpr0 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i32>) = COPY [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY6]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY7]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $sgpr4 + %2:_(i32) = COPY $vgpr0 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -171,30 +171,30 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i32>) = COPY [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY1]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY1]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -209,33 +209,33 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY4]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY5]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY6]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr4 - %2:_(s32) = COPY $vgpr0 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY3]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY4]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY5]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY6]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $sgpr4 + %2:_(i32) = COPY $vgpr0 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -250,30 +250,30 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr0 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY1]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY1]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY3]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr0 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -288,29 +288,29 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(s32) = COPY $vgpr5 - %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<4 x i32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[UV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY1]], [[UV1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY1]], [[UV2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x i32>) + %0:_(<4 x i32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(i32) = COPY $vgpr4 + %2:_(i32) = COPY $vgpr5 + %3:_(<4 x i32>) = G_INSERT_VECTOR_ELT %0, %1(i32), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3(<4 x i32>) ... --- @@ -325,16 +325,16 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_s_s_s ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17, $sgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:sgpr(<8 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[COPY2]](s32) - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[IVEC]](<8 x s64>) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s64) = COPY $sgpr16_sgpr17 - %2:_(s32) = COPY $sgpr18 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr16_sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:sgpr(<8 x i64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](i64), [[COPY2]](i32) + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[IVEC]](<8 x i64>) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i64) = COPY $sgpr16_sgpr17 + %2:_(i32) = COPY $sgpr18 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3(<8 x i64>) ... --- @@ -349,23 +349,23 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_v_s_s ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr16_sgpr17, $sgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x s64>) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s64) = COPY $sgpr16_sgpr17 - %2:_(s32) = COPY $sgpr18 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr16_sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x i32>) = G_BITCAST [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SHL]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:vgpr(<16 x i32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](i32), [[SHL]](i32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:vgpr(<16 x i32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](i32), [[ADD]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[IVEC1]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x i64>) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i64) = COPY $sgpr16_sgpr17 + %2:_(i32) = COPY $sgpr18 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... --- @@ -380,24 +380,24 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_s_v_s ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0_vgpr1, $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY3]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x s64>) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s32) = COPY $sgpr16 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x i64>) = COPY [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x i32>) = G_BITCAST [[COPY3]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SHL]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:vgpr(<16 x i32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](i32), [[SHL]](i32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:vgpr(<16 x i32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](i32), [[ADD]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[IVEC1]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x i64>) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i32) = COPY $sgpr16 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... --- @@ -412,68 +412,68 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_s_s_v ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY5]], [[UV3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY6]], [[UV4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY7]], [[UV5]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY8]], [[UV6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY9]], [[UV7]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY10]], [[UV8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY11]], [[UV9]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY12]], [[UV10]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY13]], [[UV11]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY14]], [[UV12]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY15]], [[UV13]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY16]], [[UV14]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY17]], [[UV15]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY18]], [[UV16]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY19]], [[UV17]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s64) = COPY $sgpr16_sgpr17 - %2:_(s32) = COPY $vgpr0 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr16_sgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x i64>) = COPY [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32), [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[UV2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY5]], [[UV3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY6]], [[UV4]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY7]], [[UV5]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY8]], [[UV6]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY9]], [[UV7]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY10]], [[UV8]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY11]], [[UV9]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[COPY12]], [[UV10]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[COPY13]], [[UV11]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[COPY14]], [[UV12]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[COPY15]], [[UV13]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C6]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[COPY16]], [[UV14]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[COPY17]], [[UV15]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C7]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[COPY18]], [[UV16]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[COPY19]], [[UV17]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32), [[SELECT4]](i32), [[SELECT5]](i32), [[SELECT6]](i32), [[SELECT7]](i32), [[SELECT8]](i32), [[SELECT9]](i32), [[SELECT10]](i32), [[SELECT11]](i32), [[SELECT12]](i32), [[SELECT13]](i32), [[SELECT14]](i32), [[SELECT15]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[BUILD_VECTOR]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x i64>) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i64) = COPY $sgpr16_sgpr17 + %2:_(i32) = COPY $vgpr0 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... --- @@ -488,52 +488,52 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_s_v_v ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV]], [[UV4]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV5]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV]], [[UV6]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV1]], [[UV7]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV]], [[UV8]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV1]], [[UV9]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV]], [[UV10]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV1]], [[UV11]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV]], [[UV12]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV1]], [[UV13]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV]], [[UV14]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV1]], [[UV15]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV]], [[UV16]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV1]], [[UV17]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) - %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x i64>) = COPY [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32), [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV]], [[UV4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV1]], [[UV5]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV]], [[UV6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV1]], [[UV7]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV]], [[UV8]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV1]], [[UV9]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C4]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV]], [[UV10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV1]], [[UV11]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C5]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV]], [[UV12]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV1]], [[UV13]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV]], [[UV14]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV1]], [[UV15]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C7]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV]], [[UV16]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV1]], [[UV17]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32), [[SELECT4]](i32), [[SELECT5]](i32), [[SELECT6]](i32), [[SELECT7]](i32), [[SELECT8]](i32), [[SELECT9]](i32), [[SELECT10]](i32), [[SELECT11]](i32), [[SELECT12]](i32), [[SELECT13]](i32), [[SELECT14]](i32), [[SELECT15]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[BUILD_VECTOR]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x i64>) + %0:_(<8 x i64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... --- @@ -548,23 +548,23 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_v_v_s ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17, $sgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr16_vgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr18 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x s64>) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s64) = COPY $vgpr16_vgpr17 - %2:_(s32) = COPY $sgpr18 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr16_vgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr18 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x i32>) = G_BITCAST [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SHL]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:vgpr(<16 x i32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](i32), [[SHL]](i32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:vgpr(<16 x i32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](i32), [[ADD]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[IVEC1]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x i64>) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i64) = COPY $vgpr16_vgpr17 + %2:_(i32) = COPY $sgpr18 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... --- @@ -579,67 +579,67 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_v_s_v ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0_sgpr1, $vgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[UV2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY6]], [[UV5]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY7]], [[UV6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY8]], [[UV7]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY9]], [[UV8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY10]], [[UV9]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY11]], [[UV10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY12]], [[UV11]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY13]], [[UV12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY14]], [[UV13]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY15]], [[UV14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY16]], [[UV15]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY17]], [[UV16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY18]], [[UV17]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s64) = COPY $sgpr0_sgpr1 - %2:_(s32) = COPY $vgpr16 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr16 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32), [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY3]], [[UV2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[UV3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY5]], [[UV4]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY6]], [[UV5]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY7]], [[UV6]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[COPY8]], [[UV7]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY9]], [[UV8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[COPY10]], [[UV9]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C4]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[COPY11]], [[UV10]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[COPY12]], [[UV11]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[COPY13]], [[UV12]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[COPY14]], [[UV13]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C6]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[COPY15]], [[UV14]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[COPY16]], [[UV15]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C7]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[COPY17]], [[UV16]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(i32) = COPY [[UV1]](i32) + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[COPY18]], [[UV17]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32), [[SELECT4]](i32), [[SELECT5]](i32), [[SELECT6]](i32), [[SELECT7]](i32), [[SELECT8]](i32), [[SELECT9]](i32), [[SELECT10]](i32), [[SELECT11]](i32), [[SELECT12]](i32), [[SELECT13]](i32), [[SELECT14]](i32), [[SELECT15]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[BUILD_VECTOR]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x i64>) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i64) = COPY $sgpr0_sgpr1 + %2:_(i32) = COPY $vgpr16 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... --- @@ -654,51 +654,51 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v8s64_v_v_v ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17, $vgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr16_vgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV]], [[UV4]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV5]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV]], [[UV6]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV1]], [[UV7]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV]], [[UV8]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV1]], [[UV9]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV]], [[UV10]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV1]], [[UV11]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV]], [[UV12]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV1]], [[UV13]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV]], [[UV14]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV1]], [[UV15]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV]], [[UV16]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV1]], [[UV17]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s64) = COPY $vgpr16_vgpr17 - %2:_(s32) = COPY $vgpr18 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr16_vgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32), [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV]], [[UV4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV1]], [[UV5]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV]], [[UV6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV1]], [[UV7]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV]], [[UV8]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV1]], [[UV9]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C4]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV]], [[UV10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV1]], [[UV11]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C5]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV]], [[UV12]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV1]], [[UV13]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV]], [[UV14]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV1]], [[UV15]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C7]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV]], [[UV16]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV1]], [[UV17]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32), [[SELECT4]](i32), [[SELECT5]](i32), [[SELECT6]](i32), [[SELECT7]](i32), [[SELECT8]](i32), [[SELECT9]](i32), [[SELECT10]](i32), [[SELECT11]](i32), [[SELECT12]](i32), [[SELECT13]](i32), [[SELECT14]](i32), [[SELECT15]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[BUILD_VECTOR]](<16 x i32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x i64>) + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i64) = COPY $vgpr16_vgpr17 + %2:_(i32) = COPY $vgpr18 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) ... # Make sure nothing goes wrong if the G_INSERT_VECTOR_ELT is the last @@ -714,58 +714,61 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17, $vgpr18 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr16_vgpr17 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV]], [[UV4]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV5]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV]], [[UV6]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV1]], [[UV7]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV]], [[UV8]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV1]], [[UV9]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV]], [[UV10]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV1]], [[UV11]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV]], [[UV12]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV1]], [[UV13]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV]], [[UV14]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV1]], [[UV15]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV]], [[UV16]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV1]], [[UV17]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr16_vgpr17 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr18 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32), [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<8 x i64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV]], [[UV4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[UV1]], [[UV5]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV]], [[UV6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP2]](i1), [[UV1]], [[UV7]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV]], [[UV8]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP3]](i1), [[UV1]], [[UV9]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C4]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV]], [[UV10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP4]](i1), [[UV1]], [[UV11]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C5]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV]], [[UV12]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP5]](i1), [[UV1]], [[UV13]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C6]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV]], [[UV14]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP6]](i1), [[UV1]], [[UV15]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C7]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV]], [[UV16]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP7]](i1), [[UV1]], [[UV17]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32), [[SELECT2]](i32), [[SELECT3]](i32), [[SELECT4]](i32), [[SELECT5]](i32), [[SELECT6]](i32), [[SELECT7]](i32), [[SELECT8]](i32), [[SELECT9]](i32), [[SELECT10]](i32), [[SELECT11]](i32), [[SELECT12]](i32), [[SELECT13]](i32), [[SELECT14]](i32), [[SELECT15]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[BUILD_VECTOR]](<16 x i32>) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x i64>) bb.0: + successors: %bb.1(0x80000000) liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17, $vgpr18 - %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - %1:_(s64) = COPY $vgpr16_vgpr17 - %2:_(s32) = COPY $vgpr18 - %3:_(<8 x s64>) = G_INSERT_VECTOR_ELT %0, %1, %2 + %0:_(<8 x i64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(i64) = COPY $vgpr16_vgpr17 + %2:_(i32) = COPY $vgpr18 + %3:_(<8 x i64>) = G_INSERT_VECTOR_ELT %0, %1(i64), %2(i32) bb.1: - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3(<8 x i64>) + + ... --- @@ -782,30 +785,30 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_with_s_buffer_load ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(<2 x s32>) = COPY $sgpr4_sgpr5 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x s32>), %3(s32), 0 :: (dereferenceable invariant load (s32)) - %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %4(s32), %2(s32) - - S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(i32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x i32>), [[C]](i32), 0 :: (dereferenceable invariant load (i32)) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[AMDGPU_S_BUFFER_LOAD]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[C2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[AMDGPU_S_BUFFER_LOAD]](i32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<2 x i32>) = COPY $sgpr4_sgpr5 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i32) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x i32>), %3(i32), 0 :: (dereferenceable invariant load (i32)) + %5:_(<2 x i32>) = G_INSERT_VECTOR_ELT %1, %4(i32), %2(i32) + S_ENDPGM 0, implicit %5(<2 x i32>) + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir index 55f6fa8d3e0ed..33d00c08e6382 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(i64) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i64) = G_INSERT [[COPY2]], [[COPY1]](i32), 0 + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- name: insert_lo32_i64_vs @@ -48,13 +48,13 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_vs ; CHECK: liveins: $vgpr0_vgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[COPY2]](s32), 0 - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i64) = G_INSERT [[COPY]], [[COPY2]](i32), 0 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- name: insert_lo32_i64_vv @@ -66,12 +66,12 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(i64) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- @@ -84,12 +84,12 @@ body: | ; CHECK-LABEL: name: insert_lo32_i96_v ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(s32) = COPY $vgpr3 - %2:_(s96) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i96) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + %0:_(i96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(i32) = COPY $vgpr3 + %2:_(i96) = G_INSERT %0, %1(i32), 0 ... --- @@ -102,12 +102,12 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_aa ; CHECK: liveins: $agpr0_agpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:agpr(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - %0:_(s64) = COPY $agpr0_agpr1 - %1:_(s32) = COPY $agpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:agpr(i64) = G_INSERT [[COPY]], [[COPY1]](i32), 0 + %0:_(i64) = COPY $agpr0_agpr1 + %1:_(i32) = COPY $agpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- @@ -120,13 +120,13 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_av ; CHECK: liveins: $agpr0_agpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 - %0:_(s64) = COPY $agpr0_agpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i64) = G_INSERT [[COPY2]], [[COPY1]](i32), 0 + %0:_(i64) = COPY $agpr0_agpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- name: insert_lo32_i64_va @@ -138,13 +138,13 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_va ; CHECK: liveins: $vgpr0_vgpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[COPY2]](s32), 0 - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $agpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i64) = G_INSERT [[COPY]], [[COPY2]](i32), 0 + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $agpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- @@ -157,14 +157,14 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_as ; CHECK: liveins: $agpr0_agpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY3]](s32), 0 - %0:_(s64) = COPY $agpr0_agpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i64) = G_INSERT [[COPY2]], [[COPY3]](i32), 0 + %0:_(i64) = COPY $agpr0_agpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... --- name: insert_lo32_i64_sa @@ -176,12 +176,12 @@ body: | ; CHECK-LABEL: name: insert_lo32_i64_sa ; CHECK: liveins: $sgpr0_sgpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY3]](s32), 0 - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $agpr2 - %2:_(s64) = G_INSERT %0, %1, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(i64) = G_INSERT [[COPY2]], [[COPY3]](i32), 0 + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $agpr2 + %2:_(i64) = G_INSERT %0, %1(i32), 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir index eca10bf2effbf..a785a820243c8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir @@ -12,11 +12,13 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_TRUNC [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(f32) = G_INTRINSIC_TRUNC [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_TRUNC %1 ... --- @@ -29,8 +31,10 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_TRUNC [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_INTRINSIC_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(f32) = G_INTRINSIC_TRUNC [[BITCAST]] + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC_TRUNC %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-inttoptr.mir index 053aede615f86..af5f02fb9ff49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-inttoptr.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: inttoptr_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:sgpr(p4) = G_INTTOPTR [[COPY]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(p4) = G_INTTOPTR %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:sgpr(p4) = G_INTTOPTR [[COPY]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(p4) = G_INTTOPTR %0(i64) ... --- @@ -28,8 +28,8 @@ body: | ; CHECK-LABEL: name: inttoptr_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:vgpr(p0) = G_INTTOPTR [[COPY]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(p0) = G_INTTOPTR %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:vgpr(p0) = G_INTTOPTR [[COPY]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(p0) = G_INTTOPTR %0(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index bf1dcad80e4ec..ee0ab9e2d1aa7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -119,23 +119,23 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32), [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32), [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[COPY1]](p1) :: (load (<4 x i32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x i32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x i32>) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32), [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32), [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.global.not.uniform.v8i32) + %1:_(<8 x i32>) = G_LOAD %0(p1) :: (load (<8 x i32>) from %ir.global.not.uniform.v8i32, addrspace 1) ... --- @@ -152,31 +152,44 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GCN-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; GCN-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32) - ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV8]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV9]] - ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32) - ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV10]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV11]] - ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[COPY1]](p1) :: (load (<2 x f64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x f64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x f64>) = G_CONCAT_VECTORS [[LOAD]](<2 x f64>), [[LOAD1]](<2 x f64>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x i64>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i64), [[UV1:%[0-9]+]]:vgpr(i64), [[UV2:%[0-9]+]]:vgpr(i64), [[UV3:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BITCAST]](<4 x i64>) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV]](i64) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST1]](f64) + ; GCN-NEXT: [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV]](i64) + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV1]](i64) + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST4]](f64) + ; GCN-NEXT: [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST5]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32) + ; GCN-NEXT: [[BITCAST6:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV1]](i64) + ; GCN-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV2]](i64) + ; GCN-NEXT: [[BITCAST8:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST7]](f64) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST8]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV8]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32) + ; GCN-NEXT: [[BITCAST9:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV2]](i64) + ; GCN-NEXT: [[BITCAST10:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV3]](i64) + ; GCN-NEXT: [[BITCAST11:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST10]](f64) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST11]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV10]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) + ; GCN-NEXT: [[BITCAST12:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV3]](i64) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x f64>) = G_BUILD_VECTOR [[BITCAST3]](f64), [[BITCAST6]](f64), [[BITCAST9]](f64), [[BITCAST12]](f64) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.global.not.uniform.v4i64) + %1:_(<4 x f64>) = G_LOAD %0(p1) :: (load (<4 x f64>) from %ir.global.not.uniform.v4i64, addrspace 1) ... --- @@ -192,37 +205,37 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) - ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) - ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV8]] - ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV9]] - ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV10]] - ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV11]] - ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV12]] - ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV13]] - ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV14]] - ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV15]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<16 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32), [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32), [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32), [[AMDGPU_READANYLANE8]](s32), [[AMDGPU_READANYLANE9]](s32), [[AMDGPU_READANYLANE10]](s32), [[AMDGPU_READANYLANE11]](s32), [[AMDGPU_READANYLANE12]](s32), [[AMDGPU_READANYLANE13]](s32), [[AMDGPU_READANYLANE14]](s32), [[AMDGPU_READANYLANE15]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[COPY1]](p1) :: (load (<4 x i32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x i32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) + ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 32 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C1]](i64) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x i32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) + ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 48 + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C2]](i64) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x i32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x i32>) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV8]] + ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV9]] + ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV10]] + ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV11]] + ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV12]] + ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV13]] + ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV14]] + ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV15]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<16 x i32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32), [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32), [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32), [[AMDGPU_READANYLANE8]](i32), [[AMDGPU_READANYLANE9]](i32), [[AMDGPU_READANYLANE10]](i32), [[AMDGPU_READANYLANE11]](i32), [[AMDGPU_READANYLANE12]](i32), [[AMDGPU_READANYLANE13]](i32), [[AMDGPU_READANYLANE14]](i32), [[AMDGPU_READANYLANE15]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.global.not.uniform.v16i32) + %1:_(<16 x i32>) = G_LOAD %0(p1) :: (load (<16 x i32>) from %ir.global.not.uniform.v16i32, addrspace 1) ... --- @@ -238,53 +251,78 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) - ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) - ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV8]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV9]] - ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV10]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV11]] - ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32) - ; GCN-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV12]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV13]] - ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32) - ; GCN-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV14]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV15]] - ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) - ; GCN-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV16]] - ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV17]] - ; GCN-NEXT: [[MV4:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE8]](s32), [[AMDGPU_READANYLANE9]](s32) - ; GCN-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV18]] - ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV19]] - ; GCN-NEXT: [[MV5:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE10]](s32), [[AMDGPU_READANYLANE11]](s32) - ; GCN-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV20]] - ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV21]] - ; GCN-NEXT: [[MV6:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE12]](s32), [[AMDGPU_READANYLANE13]](s32) - ; GCN-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV22]] - ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV23]] - ; GCN-NEXT: [[MV7:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE14]](s32), [[AMDGPU_READANYLANE15]](s32) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[COPY1]](p1) :: (load (<2 x f64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x f64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) + ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 32 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C1]](i64) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x f64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) + ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 48 + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C2]](i64) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x f64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f64>) = G_CONCAT_VECTORS [[LOAD]](<2 x f64>), [[LOAD1]](<2 x f64>), [[LOAD2]](<2 x f64>), [[LOAD3]](<2 x f64>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[CONCAT_VECTORS]](<8 x f64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i64), [[UV1:%[0-9]+]]:vgpr(i64), [[UV2:%[0-9]+]]:vgpr(i64), [[UV3:%[0-9]+]]:vgpr(i64), [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64), [[UV6:%[0-9]+]]:vgpr(i64), [[UV7:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BITCAST]](<8 x i64>) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV]](i64) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST1]](f64) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV8]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV]](i64) + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV1]](i64) + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST4]](f64) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST5]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV10]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32) + ; GCN-NEXT: [[BITCAST6:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV1]](i64) + ; GCN-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV2]](i64) + ; GCN-NEXT: [[BITCAST8:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST7]](f64) + ; GCN-NEXT: [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST8]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV12]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV13]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32) + ; GCN-NEXT: [[BITCAST9:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV2]](i64) + ; GCN-NEXT: [[BITCAST10:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV3]](i64) + ; GCN-NEXT: [[BITCAST11:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST10]](f64) + ; GCN-NEXT: [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST11]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV14]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV15]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) + ; GCN-NEXT: [[BITCAST12:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV3]](i64) + ; GCN-NEXT: [[BITCAST13:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV4]](i64) + ; GCN-NEXT: [[BITCAST14:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST13]](f64) + ; GCN-NEXT: [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST14]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV16]] + ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV17]] + ; GCN-NEXT: [[MV4:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE8]](i32), [[AMDGPU_READANYLANE9]](i32) + ; GCN-NEXT: [[BITCAST15:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV4]](i64) + ; GCN-NEXT: [[BITCAST16:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV5]](i64) + ; GCN-NEXT: [[BITCAST17:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST16]](f64) + ; GCN-NEXT: [[UV18:%[0-9]+]]:vgpr(i32), [[UV19:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST17]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV18]] + ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV19]] + ; GCN-NEXT: [[MV5:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE10]](i32), [[AMDGPU_READANYLANE11]](i32) + ; GCN-NEXT: [[BITCAST18:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV5]](i64) + ; GCN-NEXT: [[BITCAST19:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV6]](i64) + ; GCN-NEXT: [[BITCAST20:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST19]](f64) + ; GCN-NEXT: [[UV20:%[0-9]+]]:vgpr(i32), [[UV21:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST20]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV20]] + ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV21]] + ; GCN-NEXT: [[MV6:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE12]](i32), [[AMDGPU_READANYLANE13]](i32) + ; GCN-NEXT: [[BITCAST21:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV6]](i64) + ; GCN-NEXT: [[BITCAST22:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV7]](i64) + ; GCN-NEXT: [[BITCAST23:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST22]](f64) + ; GCN-NEXT: [[UV22:%[0-9]+]]:vgpr(i32), [[UV23:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST23]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV22]] + ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV23]] + ; GCN-NEXT: [[MV7:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE14]](i32), [[AMDGPU_READANYLANE15]](i32) + ; GCN-NEXT: [[BITCAST24:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV7]](i64) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x f64>) = G_BUILD_VECTOR [[BITCAST3]](f64), [[BITCAST6]](f64), [[BITCAST9]](f64), [[BITCAST12]](f64), [[BITCAST15]](f64), [[BITCAST18]](f64), [[BITCAST21]](f64), [[BITCAST24]](f64) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.global.not.uniform.v8i64) + %1:_(<8 x f64>) = G_LOAD %0(p1) :: (load (<8 x f64>) from %ir.global.not.uniform.v8i64, addrspace 1) ... --- @@ -299,9 +337,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s32>), addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x i32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x i32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load (<8 x s32>), addrspace 1) + %1:_(<8 x i32>) = G_LOAD %0(p1) :: (invariant load (<8 x i32>), addrspace 1) ... --- @@ -316,9 +354,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x s64>), addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x f64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x f64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load (<4 x s64>), addrspace 1) + %1:_(<4 x f64>) = G_LOAD %0(p1) :: (invariant load (<4 x f64>), addrspace 1) ... --- @@ -333,9 +371,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x s32>), addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x i32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x i32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load (<16 x s32>), addrspace 1) + %1:_(<16 x i32>) = G_LOAD %0(p1) :: (invariant load (<16 x i32>), addrspace 1) ... --- @@ -350,9 +388,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s64>), addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x f64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x f64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load (<8 x s64>), addrspace 1) + %1:_(<8 x f64>) = G_LOAD %0(p1) :: (invariant load (<8 x f64>), addrspace 1) ... --- @@ -368,23 +406,23 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32), [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32), [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[COPY1]](p4) :: (load (<4 x i32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x i32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x i32>) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32), [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32), [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.constant.not.uniform.v8i32) + %1:_(<8 x i32>) = G_LOAD %0(p4) :: (load (<8 x i32>) from %ir.constant.not.uniform.v8i32, addrspace 4) ... --- @@ -400,23 +438,23 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY1]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) - ; GCN-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s256) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s256) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32), [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32), [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i128) = G_LOAD [[COPY1]](p4) :: (load (i128) from %ir.constant.not.uniform, align 32, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(i128) = G_LOAD [[PTR_ADD]](p4) :: (load (i128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; GCN-NEXT: [[MV:%[0-9]+]]:vgpr(i256) = G_MERGE_VALUES [[LOAD]](i128), [[LOAD1]](i128) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i256) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(i256) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32), [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32), [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s256) = G_LOAD %0 :: (load (s256) from %ir.constant.not.uniform) + %1:_(i256) = G_LOAD %0(p4) :: (load (i256) from %ir.constant.not.uniform, addrspace 4) ... --- @@ -433,23 +471,23 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY1]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>), [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>), [[UV4:%[0-9]+]]:vgpr(<2 x s16>), [[UV5:%[0-9]+]]:vgpr(<2 x s16>), [[UV6:%[0-9]+]]:vgpr(<2 x s16>), [[UV7:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV1]] - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV2]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV3]] - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:sgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_READANYLANE]](<2 x s16>), [[AMDGPU_READANYLANE1]](<2 x s16>), [[AMDGPU_READANYLANE2]](<2 x s16>), [[AMDGPU_READANYLANE3]](<2 x s16>), [[AMDGPU_READANYLANE4]](<2 x s16>), [[AMDGPU_READANYLANE5]](<2 x s16>), [[AMDGPU_READANYLANE6]](<2 x s16>), [[AMDGPU_READANYLANE7]](<2 x s16>) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x i16>) = G_LOAD [[COPY1]](p4) :: (load (<8 x i16>) from %ir.constant.not.uniform, align 32, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<8 x i16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x i16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x i16>) = G_CONCAT_VECTORS [[LOAD]](<8 x i16>), [[LOAD1]](<8 x i16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>), [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>), [[UV4:%[0-9]+]]:vgpr(<2 x i16>), [[UV5:%[0-9]+]]:vgpr(<2 x i16>), [[UV6:%[0-9]+]]:vgpr(<2 x i16>), [[UV7:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x i16>) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV1]] + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV2]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV3]] + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(<2 x i16>) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:sgpr(<16 x i16>) = G_CONCAT_VECTORS [[AMDGPU_READANYLANE]](<2 x i16>), [[AMDGPU_READANYLANE1]](<2 x i16>), [[AMDGPU_READANYLANE2]](<2 x i16>), [[AMDGPU_READANYLANE3]](<2 x i16>), [[AMDGPU_READANYLANE4]](<2 x i16>), [[AMDGPU_READANYLANE5]](<2 x i16>), [[AMDGPU_READANYLANE6]](<2 x i16>), [[AMDGPU_READANYLANE7]](<2 x i16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>) from %ir.constant.not.uniform) + %1:_(<16 x i16>) = G_LOAD %0(p4) :: (load (<16 x i16>) from %ir.constant.not.uniform, addrspace 4) ... --- @@ -465,31 +503,44 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GCN-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; GCN-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32) - ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV8]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV9]] - ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32) - ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV10]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV11]] - ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[COPY1]](p4) :: (load (<2 x f64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x f64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x f64>) = G_CONCAT_VECTORS [[LOAD]](<2 x f64>), [[LOAD1]](<2 x f64>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<4 x i64>) = G_BITCAST [[CONCAT_VECTORS]](<4 x f64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i64), [[UV1:%[0-9]+]]:vgpr(i64), [[UV2:%[0-9]+]]:vgpr(i64), [[UV3:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BITCAST]](<4 x i64>) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV]](i64) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST1]](f64) + ; GCN-NEXT: [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV]](i64) + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV1]](i64) + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST4]](f64) + ; GCN-NEXT: [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST5]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32) + ; GCN-NEXT: [[BITCAST6:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV1]](i64) + ; GCN-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV2]](i64) + ; GCN-NEXT: [[BITCAST8:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST7]](f64) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST8]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV8]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32) + ; GCN-NEXT: [[BITCAST9:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV2]](i64) + ; GCN-NEXT: [[BITCAST10:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV3]](i64) + ; GCN-NEXT: [[BITCAST11:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST10]](f64) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST11]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV10]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) + ; GCN-NEXT: [[BITCAST12:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV3]](i64) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x f64>) = G_BUILD_VECTOR [[BITCAST3]](f64), [[BITCAST6]](f64), [[BITCAST9]](f64), [[BITCAST12]](f64) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.constant.not.uniform.v4i64) + %1:_(<4 x f64>) = G_LOAD %0(p4) :: (load (<4 x f64>) from %ir.constant.not.uniform.v4i64, addrspace 4) ... --- @@ -505,37 +556,37 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) - ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) - ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV4]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV5]] - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV6]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV7]] - ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV8]] - ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV9]] - ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV10]] - ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV11]] - ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV12]] - ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV13]] - ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV14]] - ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV15]] - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<16 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32), [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32), [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32), [[AMDGPU_READANYLANE8]](s32), [[AMDGPU_READANYLANE9]](s32), [[AMDGPU_READANYLANE10]](s32), [[AMDGPU_READANYLANE11]](s32), [[AMDGPU_READANYLANE12]](s32), [[AMDGPU_READANYLANE13]](s32), [[AMDGPU_READANYLANE14]](s32), [[AMDGPU_READANYLANE15]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[COPY1]](p4) :: (load (<4 x i32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x i32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) + ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 32 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C1]](i64) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x i32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) + ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 48 + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C2]](i64) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x i32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32), [[UV4:%[0-9]+]]:vgpr(i32), [[UV5:%[0-9]+]]:vgpr(i32), [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32), [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32), [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32), [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32), [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x i32>) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV4]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV5]] + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV6]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV7]] + ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV8]] + ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV9]] + ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV10]] + ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV11]] + ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV12]] + ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV13]] + ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV14]] + ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV15]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<16 x i32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32), [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32), [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32), [[AMDGPU_READANYLANE8]](i32), [[AMDGPU_READANYLANE9]](i32), [[AMDGPU_READANYLANE10]](i32), [[AMDGPU_READANYLANE11]](i32), [[AMDGPU_READANYLANE12]](i32), [[AMDGPU_READANYLANE13]](i32), [[AMDGPU_READANYLANE14]](i32), [[AMDGPU_READANYLANE15]](i32) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.constant.not.uniform.v16i32) + %1:_(<16 x i32>) = G_LOAD %0(p4) :: (load (<16 x i32>) from %ir.constant.not.uniform.v16i32, addrspace 4) ... --- @@ -551,53 +602,78 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) - ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) - ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) - ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV8]] - ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV9]] - ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV10]] - ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV11]] - ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32) - ; GCN-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV12]] - ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV13]] - ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](s32), [[AMDGPU_READANYLANE5]](s32) - ; GCN-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV14]] - ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV15]] - ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](s32), [[AMDGPU_READANYLANE7]](s32) - ; GCN-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV16]] - ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV17]] - ; GCN-NEXT: [[MV4:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE8]](s32), [[AMDGPU_READANYLANE9]](s32) - ; GCN-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV18]] - ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV19]] - ; GCN-NEXT: [[MV5:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE10]](s32), [[AMDGPU_READANYLANE11]](s32) - ; GCN-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV20]] - ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV21]] - ; GCN-NEXT: [[MV6:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE12]](s32), [[AMDGPU_READANYLANE13]](s32) - ; GCN-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) - ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV22]] - ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV23]] - ; GCN-NEXT: [[MV7:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE14]](s32), [[AMDGPU_READANYLANE15]](s32) - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[COPY1]](p4) :: (load (<2 x f64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x f64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) + ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 32 + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C1]](i64) + ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x f64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) + ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 48 + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C2]](i64) + ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x f64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x f64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x f64>) = G_CONCAT_VECTORS [[LOAD]](<2 x f64>), [[LOAD1]](<2 x f64>), [[LOAD2]](<2 x f64>), [[LOAD3]](<2 x f64>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x i64>) = G_BITCAST [[CONCAT_VECTORS]](<8 x f64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(i64), [[UV1:%[0-9]+]]:vgpr(i64), [[UV2:%[0-9]+]]:vgpr(i64), [[UV3:%[0-9]+]]:vgpr(i64), [[UV4:%[0-9]+]]:vgpr(i64), [[UV5:%[0-9]+]]:vgpr(i64), [[UV6:%[0-9]+]]:vgpr(i64), [[UV7:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[BITCAST]](<8 x i64>) + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV]](i64) + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST1]](f64) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(i32), [[UV9:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST2]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV8]] + ; GCN-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32) + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV]](i64) + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV1]](i64) + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST4]](f64) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(i32), [[UV11:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST5]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV10]] + ; GCN-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32) + ; GCN-NEXT: [[BITCAST6:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV1]](i64) + ; GCN-NEXT: [[BITCAST7:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV2]](i64) + ; GCN-NEXT: [[BITCAST8:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST7]](f64) + ; GCN-NEXT: [[UV12:%[0-9]+]]:vgpr(i32), [[UV13:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST8]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE4:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV12]] + ; GCN-NEXT: [[AMDGPU_READANYLANE5:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV13]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE4]](i32), [[AMDGPU_READANYLANE5]](i32) + ; GCN-NEXT: [[BITCAST9:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV2]](i64) + ; GCN-NEXT: [[BITCAST10:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV3]](i64) + ; GCN-NEXT: [[BITCAST11:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST10]](f64) + ; GCN-NEXT: [[UV14:%[0-9]+]]:vgpr(i32), [[UV15:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST11]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE6:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV14]] + ; GCN-NEXT: [[AMDGPU_READANYLANE7:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV15]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE6]](i32), [[AMDGPU_READANYLANE7]](i32) + ; GCN-NEXT: [[BITCAST12:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV3]](i64) + ; GCN-NEXT: [[BITCAST13:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV4]](i64) + ; GCN-NEXT: [[BITCAST14:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST13]](f64) + ; GCN-NEXT: [[UV16:%[0-9]+]]:vgpr(i32), [[UV17:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST14]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE8:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV16]] + ; GCN-NEXT: [[AMDGPU_READANYLANE9:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV17]] + ; GCN-NEXT: [[MV4:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE8]](i32), [[AMDGPU_READANYLANE9]](i32) + ; GCN-NEXT: [[BITCAST15:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV4]](i64) + ; GCN-NEXT: [[BITCAST16:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV5]](i64) + ; GCN-NEXT: [[BITCAST17:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST16]](f64) + ; GCN-NEXT: [[UV18:%[0-9]+]]:vgpr(i32), [[UV19:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST17]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE10:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV18]] + ; GCN-NEXT: [[AMDGPU_READANYLANE11:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV19]] + ; GCN-NEXT: [[MV5:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE10]](i32), [[AMDGPU_READANYLANE11]](i32) + ; GCN-NEXT: [[BITCAST18:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV5]](i64) + ; GCN-NEXT: [[BITCAST19:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV6]](i64) + ; GCN-NEXT: [[BITCAST20:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST19]](f64) + ; GCN-NEXT: [[UV20:%[0-9]+]]:vgpr(i32), [[UV21:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST20]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE12:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV20]] + ; GCN-NEXT: [[AMDGPU_READANYLANE13:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV21]] + ; GCN-NEXT: [[MV6:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE12]](i32), [[AMDGPU_READANYLANE13]](i32) + ; GCN-NEXT: [[BITCAST21:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV6]](i64) + ; GCN-NEXT: [[BITCAST22:%[0-9]+]]:vgpr(f64) = G_BITCAST [[UV7]](i64) + ; GCN-NEXT: [[BITCAST23:%[0-9]+]]:vgpr(i64) = G_BITCAST [[BITCAST22]](f64) + ; GCN-NEXT: [[UV22:%[0-9]+]]:vgpr(i32), [[UV23:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[BITCAST23]](i64) + ; GCN-NEXT: [[AMDGPU_READANYLANE14:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV22]] + ; GCN-NEXT: [[AMDGPU_READANYLANE15:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV23]] + ; GCN-NEXT: [[MV7:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[AMDGPU_READANYLANE14]](i32), [[AMDGPU_READANYLANE15]](i32) + ; GCN-NEXT: [[BITCAST24:%[0-9]+]]:sgpr(f64) = G_BITCAST [[MV7]](i64) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x f64>) = G_BUILD_VECTOR [[BITCAST3]](f64), [[BITCAST6]](f64), [[BITCAST9]](f64), [[BITCAST12]](f64), [[BITCAST15]](f64), [[BITCAST18]](f64), [[BITCAST21]](f64), [[BITCAST24]](f64) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.constant.not.uniform.v8i64) + %1:_(<8 x f64>) = G_LOAD %0(p4) :: (load (<8 x f64>) from %ir.constant.not.uniform.v8i64, addrspace 4) ... --- @@ -612,9 +688,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x i32>) = G_LOAD [[COPY]](p4) :: (load (<8 x i32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) + %1:_(<8 x i32>) = G_LOAD %0(p4) :: (load (<8 x i32>), addrspace 4) ... --- @@ -629,9 +705,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x i16>) = G_LOAD [[COPY]](p4) :: (load (<16 x i16>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>), addrspace 4) + %1:_(<16 x i16>) = G_LOAD %0(p4) :: (load (<16 x i16>), addrspace 4) ... --- @@ -646,9 +722,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x f64>) = G_LOAD [[COPY]](p4) :: (load (<4 x f64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), addrspace 4) + %1:_(<4 x f64>) = G_LOAD %0(p4) :: (load (<4 x f64>), addrspace 4) ... --- @@ -663,9 +739,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x i32>) = G_LOAD [[COPY]](p4) :: (load (<16 x i32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) + %1:_(<16 x i32>) = G_LOAD %0(p4) :: (load (<16 x i32>), addrspace 4) ... --- @@ -680,9 +756,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load (<8 x s64>), addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x f64>) = G_LOAD [[COPY]](p4) :: (load (<8 x f64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), addrspace 4) + %1:_(<8 x f64>) = G_LOAD %0(p4) :: (load (<8 x f64>), addrspace 4) ... --- @@ -698,10 +774,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p3) :: (load (i32), addrspace 3) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) + %1:_(i32) = G_LOAD %0(p3) :: (load (i32), addrspace 3) ... --- @@ -717,9 +793,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p3) :: (load (i32), addrspace 5) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5) + %1:_(i32) = G_LOAD %0(p3) :: (load (i32), addrspace 5) ... @@ -736,16 +812,16 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) - ; GFX7-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p4) :: (load (i8), addrspace 4) + ; GFX7-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] ; ; GFX12-LABEL: name: extload_constant_i8_to_i32_uniform ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p4) :: (load (i8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 4, align 1) + %1:_(i32) = G_LOAD %0(p4) :: (load (i8), addrspace 4) ... --- @@ -762,10 +838,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p4) :: (load (i8), addrspace 1) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) + %1:_(i32) = G_LOAD %0(p4) :: (load (i8), addrspace 1) ... --- @@ -782,16 +858,16 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) - ; GFX7-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p4) :: (load (i16), addrspace 4) + ; GFX7-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] ; ; GFX12-LABEL: name: extload_constant_i16_to_i32_uniform ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p4) :: (load (i16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 4, align 2) + %1:_(i32) = G_LOAD %0(p4) :: (load (i16), addrspace 4) ... --- @@ -808,10 +884,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p4) :: (load (i16), addrspace 1) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) + %1:_(i32) = G_LOAD %0(p4) :: (load (i16), addrspace 1) ... --- @@ -826,9 +902,9 @@ body: | ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p4) :: (load (i32), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 4) + %1:_(i32) = G_LOAD %0(p4) :: (load (i32), addrspace 4) ... --- @@ -845,10 +921,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p4) :: (load (i32), align 2, addrspace 4) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) + %1:_(i32) = G_LOAD %0(p4) :: (load (i32), align 2, addrspace 4) ... --- @@ -865,10 +941,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) - ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p4) :: (load (i32), align 1, addrspace 4) + ; GCN-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) + %1:_(i32) = G_LOAD %0(p4) :: (load (i32), align 1, addrspace 4) ... --- @@ -885,9 +961,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p5) :: (load (i32), addrspace 5) %0:_(p5) = COPY $sgpr0 - %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5, align 4) + %1:_(i32) = G_LOAD %0(p5) :: (load (i32), addrspace 5) ... --- @@ -903,13 +979,13 @@ body: | ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 32, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[COPY]](p4) :: (load (<4 x i32>), align 32, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x i32>) from unknown-address + 16, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) + %1:_(<8 x i32>) = G_LOAD %0(p4) :: (load (<8 x i32>), addrspace 4) ... --- @@ -931,11 +1007,11 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load (<4 x s32>), align 32, addrspace 4) - ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PHI]](p4) :: (load (<4 x i32>), align 32, addrspace 4) + ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](i64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x i32>) from unknown-address + 16, addrspace 4) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(p4) = COPY [[COPY1]](p4) ; GCN-NEXT: G_BR %bb.1 bb.0: @@ -947,10 +1023,15 @@ body: | G_BR %bb.1 bb.1: - %2:_(p4) = G_PHI %0, %bb.0, %4, %bb.1 - %3:_(<8 x s32>) = G_LOAD %2 :: (load (<8 x s32>), addrspace 4) - %4:_(p4) = COPY %1 + successors: %bb.1(0x80000000) + + %2:_(p4) = G_PHI %0(p4), %bb.0, %3(p4), %bb.1 + %4:_(<8 x i32>) = G_LOAD %2(p4) :: (load (<8 x i32>), addrspace 4) + %3:_(p4) = COPY %1(p4) G_BR %bb.1 + + + ... --- @@ -965,23 +1046,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x i32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x i32>), align 4, addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(i32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (i32) from unknown-address + 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX12-LABEL: name: load_constant_v3i32_align4 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<3 x s32>), align 4, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x i32>) = G_LOAD [[COPY]](p4) :: (invariant load (<3 x i32>), align 4, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<3 x i32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4) - S_ENDPGM 0, implicit %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (invariant load (<3 x i32>), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(<3 x i32>) ... --- @@ -996,23 +1077,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x i32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x i32>), addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(i32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (i32) from unknown-address + 8, align 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX12-LABEL: name: load_constant_v3i32_align8 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<3 x s32>), align 8, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x i32>) = G_LOAD [[COPY]](p4) :: (invariant load (<3 x i32>), align 8, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<3 x i32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8) - S_ENDPGM 0, implicit %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (invariant load (<3 x i32>), align 8, addrspace 4) + S_ENDPGM 0, implicit %1(<3 x i32>) ... --- @@ -1027,20 +1108,20 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x i32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x i32>), addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](<4 x i32>) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[UV2]](i32) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x i32>) ; ; GFX12-LABEL: name: load_constant_v3i32_align16 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<3 x s32>), align 16, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x i32>) = G_LOAD [[COPY]](p4) :: (invariant load (<3 x i32>), align 16, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<3 x i32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16) - S_ENDPGM 0, implicit %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (invariant load (<3 x i32>), align 16, addrspace 4) + S_ENDPGM 0, implicit %1(<3 x i32>) ... --- @@ -1055,23 +1136,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), align 4, addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x i16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x i16>), align 4, addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x i16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x i16>) from unknown-address + 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x i16>), [[UV1:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: load_constant_v6i16_align4 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<6 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<6 x s16>), align 4, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<6 x i16>) = G_LOAD [[COPY]](p4) :: (invariant load (<6 x i16>), align 4, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<6 x i16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4) - S_ENDPGM 0, implicit %1 + %1:_(<6 x i16>) = G_LOAD %0(p4) :: (invariant load (<6 x i16>), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(<6 x i16>) ... --- @@ -1086,23 +1167,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[LOAD1]](<2 x s16>) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x i16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x i16>), addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x i16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x i16>) from unknown-address + 8, align 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x i16>), [[UV1:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<4 x i16>) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[LOAD1]](<2 x i16>) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: load_constant_v6i16_align8 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<6 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<6 x s16>), align 8, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<6 x i16>) = G_LOAD [[COPY]](p4) :: (invariant load (<6 x i16>), align 8, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<6 x i16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8) - S_ENDPGM 0, implicit %1 + %1:_(<6 x i16>) = G_LOAD %0(p4) :: (invariant load (<6 x i16>), align 8, addrspace 4) + S_ENDPGM 0, implicit %1(<6 x i16>) ... --- @@ -1117,20 +1198,20 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>), [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>) - ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x i16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x i16>), addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x i16>), [[UV1:%[0-9]+]]:sgpr(<2 x i16>), [[UV2:%[0-9]+]]:sgpr(<2 x i16>), [[UV3:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[LOAD]](<8 x i16>) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x i16>) = G_CONCAT_VECTORS [[UV]](<2 x i16>), [[UV1]](<2 x i16>), [[UV2]](<2 x i16>) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x i16>) ; ; GFX12-LABEL: name: load_constant_v6i16_align16 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<6 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<6 x s16>), align 16, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<6 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<6 x i16>) = G_LOAD [[COPY]](p4) :: (invariant load (<6 x i16>), align 16, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](<6 x i16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16) - S_ENDPGM 0, implicit %1 + %1:_(<6 x i16>) = G_LOAD %0(p4) :: (invariant load (<6 x i16>), align 16, addrspace 4) + S_ENDPGM 0, implicit %1(<6 x i16>) ... --- @@ -1145,23 +1226,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), align 4, addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; GFX7-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[MV]](s96) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(i64) = G_LOAD [[COPY]](p4) :: (invariant load (i64), align 4, addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(i32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (i32) from unknown-address + 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; GFX7-NEXT: [[MV:%[0-9]+]]:sgpr(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[MV]](i96) ; ; GFX12-LABEL: name: load_constant_i96_align4 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(s96) = G_LOAD [[COPY]](p4) :: (invariant load (s96), align 4, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(i96) = G_LOAD [[COPY]](p4) :: (invariant load (i96), align 4, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](i96) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4) - S_ENDPGM 0, implicit %1 + %1:_(i96) = G_LOAD %0(p4) :: (invariant load (i96), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i96) ... --- @@ -1176,23 +1257,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; GFX7-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[MV]](s96) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(i64) = G_LOAD [[COPY]](p4) :: (invariant load (i64), addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(i32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (i32) from unknown-address + 8, align 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](i64) + ; GFX7-NEXT: [[MV:%[0-9]+]]:sgpr(i96) = G_MERGE_VALUES [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[MV]](i96) ; ; GFX12-LABEL: name: load_constant_i96_align8 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(s96) = G_LOAD [[COPY]](p4) :: (invariant load (s96), align 8, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(i96) = G_LOAD [[COPY]](p4) :: (invariant load (i96), align 8, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](i96) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8) - S_ENDPGM 0, implicit %1 + %1:_(i96) = G_LOAD %0(p4) :: (invariant load (i96), align 8, addrspace 4) + S_ENDPGM 0, implicit %1(i96) ... --- @@ -1207,17 +1288,17 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) - ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[LOAD]](s128) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(i128) = G_LOAD [[COPY]](p4) :: (invariant load (i128), addrspace 4) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i96) = G_TRUNC [[LOAD]](i128) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[TRUNC]](i96) ; ; GFX12-LABEL: name: load_constant_i96_align16 ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(s96) = G_LOAD [[COPY]](p4) :: (invariant load (s96), align 16, addrspace 4) - ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](s96) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(i96) = G_LOAD [[COPY]](p4) :: (invariant load (i96), align 16, addrspace 4) + ; GFX12-NEXT: S_ENDPGM 0, implicit [[LOAD]](i96) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16) - S_ENDPGM 0, implicit %1 + %1:_(i96) = G_LOAD %0(p4) :: (invariant load (i96), align 16, addrspace 4) + S_ENDPGM 0, implicit %1(i96) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir index c5024924a4d32..e6969aca0799c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: lshr_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -32,15 +32,15 @@ body: | ; CHECK-LABEL: name: lshr_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY2]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i32) = G_LSHR [[COPY2]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -53,15 +53,15 @@ body: | ; CHECK-LABEL: name: lshr_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY]], [[COPY2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i32) = G_LSHR [[COPY]], [[COPY2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -74,14 +74,14 @@ body: | ; CHECK-LABEL: name: lshr_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i32) = G_LSHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_LSHR %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -94,21 +94,21 @@ body: | ; CHECK-LABEL: name: lshr_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[ZEXT]], [[ZEXT1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[ZEXT]], [[ZEXT1]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[LSHR]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -122,19 +122,19 @@ body: | ; CHECK-LABEL: name: lshr_s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[COPY2]], [[TRUNC1]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[COPY2]], [[TRUNC1]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -147,19 +147,19 @@ body: | ; CHECK-LABEL: name: lshr_s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY2]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[COPY2]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... @@ -173,18 +173,18 @@ body: | ; CHECK-LABEL: name: lshr_s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_LSHR %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i16) = G_LSHR [[TRUNC]], [[TRUNC1]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_LSHR %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... @@ -198,26 +198,26 @@ body: | ; CHECK-LABEL: name: lshr_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:sgpr(s32) = G_LSHR [[AND]], [[AND1]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:sgpr(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST1]], [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[BITCAST1]], [[C3]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:sgpr(i32) = G_LSHR [[AND]], [[AND1]](i32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:sgpr(i32) = G_LSHR [[LSHR]], [[LSHR1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](i32), [[LSHR3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -231,15 +231,15 @@ body: | ; CHECK-LABEL: name: lshr_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY2]], [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x i16>) = G_LSHR [[COPY2]], [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -252,15 +252,15 @@ body: | ; CHECK-LABEL: name: lshr_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY2]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x i16>) = G_LSHR [[COPY]], [[COPY2]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -274,13 +274,13 @@ body: | ; CHECK-LABEL: name: lshr_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_LSHR %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x i16>) = G_LSHR [[COPY]], [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_LSHR %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir index cce4beacafdb2..0cb04c92681dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir @@ -10,62 +10,62 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - ; - ; ; GFX8-LABEL: name: mad_u64_u32_sss ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec - ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(i32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UMULH]](i32), implicit $exec + ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[MUL]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + ; ; GFX9MI-LABEL: name: mad_u64_u32_sss ; GFX9MI: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] - ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[MUL]], [[UV]] + ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] + ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + ; ; GFX10-LABEL: name: mad_u64_u32_sss ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[MUL]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -75,61 +75,61 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; - ; ; GFX8-LABEL: name: mad_u64_u32_ssv ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[MUL]](i32) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[UMULH]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY6]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(i1) = COPY [[UADDE1]](i1) + ; ; GFX9MI-LABEL: name: mad_u64_u32_ssv ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](i32), [[COPY5]], [[MV]] + ; ; GFX10-LABEL: name: mad_u64_u32_ssv ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[MUL]](i32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[UMULH]](i32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY4]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(i1) = COPY [[UADDE1]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -139,25 +139,23 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_svs ; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $sgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i64) = COPY [[MV]](i64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](i32), [[COPY1]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = COPY $sgpr2 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -167,24 +165,22 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](i32), [[COPY1]], [[MV]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = COPY $vgpr2 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -194,25 +190,23 @@ legalized: true body: | bb.0: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vss ; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $sgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i64) = COPY [[MV]](i64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY]](i32), [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = COPY $sgpr2 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -222,24 +216,22 @@ legalized: true body: | bb.0: liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vsv ; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY]](i32), [[COPY4]], [[MV]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = COPY $vgpr2 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -249,24 +241,22 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vvs ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $sgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i64) = COPY [[MV]](i64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY]](i32), [[COPY1]], [[COPY4]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = COPY $sgpr2 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -276,23 +266,21 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY]](i32), [[COPY1]], [[MV]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %4 ... --- @@ -302,77 +290,77 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - ; - ; ; GFX8-LABEL: name: mad_i64_i32_sss ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec - ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(i32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[SMULH]](i32), implicit $exec + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](i32), [[C]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[UV1]](i32), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[MUL]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(i32) = G_XOR [[XOR]], [[UADDE1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[XOR1]](i32) + ; ; GFX9MI-LABEL: name: mad_i64_i32_sss ; GFX9MI: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX9MI-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] - ; GFX9MI-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] - ; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] - ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[SMULH]](i32), [[C]] + ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX9MI-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[UV1]](i32), [[C]] + ; GFX9MI-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[MUL]], [[UV]] + ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] + ; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(i32) = G_XOR [[XOR]], [[UADDE1]] + ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[XOR1]](i32) + ; ; GFX10-LABEL: name: mad_i64_i32_sss ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[SMULH]](i32), [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[UV1]](i32), [[C]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[MUL]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:sgpr(i32) = G_XOR [[XOR]], [[UADDE1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[XOR1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_I64_I32 %0(i32), %1, %4 ... --- @@ -382,72 +370,72 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; - ; ; GFX8-LABEL: name: mad_i64_i32_ssv ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] - ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[SMULH]](i32), [[C]] + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[MUL]](i32) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[SMULH]](i32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[UV1]](i32), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(i1) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY6]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(i1) = G_XOR [[XOR]], [[UADDE1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(i1) = COPY [[XOR1]](i1) + ; ; GFX9MI-LABEL: name: mad_i64_i32_ssv ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](i32), [[COPY5]], [[MV]] + ; ; GFX10-LABEL: name: mad_i64_i32_ssv ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] - ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[SMULH]](i32), [[C]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(i1) = G_TRUNC [[ICMP]](i32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[MUL]](i32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[SMULH]](i32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(slt), [[UV1]](i32), [[C]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(i1) = G_XOR [[TRUNC]], [[ICMP1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY4]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(i1) = G_XOR [[XOR]], [[UADDE1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UADDO]](i32), [[UADDE]](i32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(i1) = COPY [[XOR1]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i64), %6:_(i1) = G_AMDGPU_MAD_I64_I32 %0(i32), %1, %4 ... --- @@ -457,48 +445,48 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; - ; ; GFX8-LABEL: name: mad_u64_u32_ss0 ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY2]], [[COPY3]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec - ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(i32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UMULH]](i32), implicit $exec + ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[MUL]](i32), [[V_READFIRSTLANE_B32_]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C1]](i32) + ; ; GFX9MI-LABEL: name: mad_u64_u32_ss0 ; GFX9MI: liveins: $sgpr0, $sgpr1 ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32) - ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) + ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[MUL]](i32), [[UMULH]](i32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C1]](i32) + ; ; GFX10-LABEL: name: mad_u64_u32_ss0 ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[MUL]](i32), [[UMULH]](i32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_CONSTANT i64 0 + %3:_(i64), %4:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %2 ... --- @@ -508,20 +496,18 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vv0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[C]](i64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_U64_U32 [[COPY]](i32), [[COPY1]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_CONSTANT i64 0 + %3:_(i64), %4:_(i1) = G_AMDGPU_MAD_U64_U32 %0(i32), %1, %2 ... --- @@ -531,51 +517,51 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; - ; ; GFX8-LABEL: name: mad_i64_i32_ss0 ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY2]], [[COPY3]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec - ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C1]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(i32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[SMULH]](i32), implicit $exec + ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](i32), [[C1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[MUL]](i32), [[V_READFIRSTLANE_B32_]](i32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; ; GFX9MI-LABEL: name: mad_i64_i32_ss0 ; GFX9MI: liveins: $sgpr0, $sgpr1 ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]] - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32) - ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[SMULH]](i32), [[C1]] + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[MUL]](i32), [[SMULH]](i32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; ; GFX10-LABEL: name: mad_i64_i32_ss0 ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(slt), [[SMULH]](i32), [[C1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[MUL]](i32), [[SMULH]](i32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = G_CONSTANT i64 0 + %3:_(i64), %4:_(i1) = G_AMDGPU_MAD_I64_I32 %0(i32), %1, %2 ... --- @@ -585,18 +571,16 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1 - ; - ; ; CHECK-LABEL: name: mad_i64_i32_vv0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[C]](i64) + ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(i1) = G_AMDGPU_MAD_I64_I32 [[COPY]](i32), [[COPY1]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_CONSTANT i64 0 + %3:_(i64), %4:_(i1) = G_AMDGPU_MAD_I64_I32 %0(i32), %1, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir index e22df46b505a5..49851bf030b74 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir @@ -12,16 +12,16 @@ body: | ; CHECK-LABEL: name: merge_s64_s32_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s64), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s64), 32 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_EXTRACT %0, 0 - %2:_(s32) = G_EXTRACT %0, 32 - %3:_(s64) = G_MERGE_VALUES %1, %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(i32) = G_EXTRACT [[COPY]](i64), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:sgpr(i32) = G_EXTRACT [[COPY]](i64), 32 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[EXTRACT]](i32), [[EXTRACT1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_EXTRACT %0(i64), 0 + %2:_(i32) = G_EXTRACT %0(i64), 32 + %3:_(i64) = G_MERGE_VALUES %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -34,16 +34,16 @@ body: | ; CHECK-LABEL: name: merge_s64_s32_s32_s64 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s64), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s64), 32 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_EXTRACT %0, 0 - %2:_(s32) = G_EXTRACT %0, 32 - %3:_(s64) = G_MERGE_VALUES %1, %2 - S_ENDPGM 0, implicit %3 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:vgpr(i32) = G_EXTRACT [[COPY]](i64), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:vgpr(i32) = G_EXTRACT [[COPY]](i64), 32 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[EXTRACT]](i32), [[EXTRACT1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_EXTRACT %0(i64), 0 + %2:_(i32) = G_EXTRACT %0(i64), 32 + %3:_(i64) = G_MERGE_VALUES %1(i32), %2(i32) + S_ENDPGM 0, implicit %3(i64) ... --- @@ -56,14 +56,14 @@ body: | ; CHECK-LABEL: name: merge_s64_s32_s32_aa ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:agpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:agpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -76,16 +76,16 @@ body: | ; CHECK-LABEL: name: merge_s64_s32_s32_sa ; CHECK: liveins: $sgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... --- @@ -98,14 +98,14 @@ body: | ; CHECK-LABEL: name: merge_s64_s32_s32_as ; CHECK: liveins: $sgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s64) = G_MERGE_VALUES %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + S_ENDPGM 0, implicit %2(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir index 3674fb9156f7a..58583e4ea64e0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir @@ -11,26 +11,28 @@ body: | ; CHECK-LABEL: name: uniform_in_vgpr ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FPTOUI]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[COPY4]](f32) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[FPTOUI]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; CHECK-NEXT: G_STORE [[COPY5]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:vgpr(s32) = COPY $vgpr1 - %4:vgpr(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:sgpr(s32) = G_FPTOUI %0(s32) - %6:sgpr(s32) = G_ADD %5, %1 - G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 + %3:vgpr(i32) = COPY $vgpr1 + %4:vgpr(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:sgpr(f32) = G_BITCAST %0(i32) + %6:sgpr(i32) = G_FPTOUI %5(f32) + %7:sgpr(i32) = G_ADD %6, %1 + G_STORE %7(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -44,31 +46,35 @@ body: | ; CHECK-LABEL: name: back_to_back_uniform_in_vgpr ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FPTOUI]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY5]], [[COPY6]] + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[FADD]](f32) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[FPTOUI]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; CHECK-NEXT: G_STORE [[COPY7]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:vgpr(s32) = COPY $vgpr0 - %4:vgpr(s32) = COPY $vgpr1 - %5:vgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:sgpr(s32) = G_FADD %0, %1 - %7:sgpr(s32) = G_FPTOUI %6(s32) - %8:sgpr(s32) = G_ADD %7, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:vgpr(i32) = COPY $vgpr0 + %4:vgpr(i32) = COPY $vgpr1 + %5:vgpr(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:sgpr(f32) = G_BITCAST %0(i32) + %7:sgpr(f32) = G_BITCAST %1(i32) + %8:sgpr(f32) = G_FADD %6, %7 + %9:sgpr(i32) = G_FPTOUI %8(f32) + %10:sgpr(i32) = G_ADD %9, %2 + G_STORE %10(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -82,46 +88,46 @@ body: | ; CHECK-LABEL: name: buffer_load_uniform ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[UV5]], [[C1]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: G_STORE [[COPY9]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32), [[UV6:%[0-9]+]]:sgpr(i32), [[UV7:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[UV5]], [[C1]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; CHECK-NEXT: G_STORE [[COPY9]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) - %5:sgpr(s32) = COPY $sgpr4 - %6:vgpr(s32) = COPY $vgpr0 - %7:vgpr(s32) = COPY $vgpr1 - %8:vgpr(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:sgpr(s32) = G_CONSTANT i32 0 - %10:sgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - %11:sgpr(s32) = G_CONSTANT i32 1 - %12:sgpr(s32), %13:sgpr(s32), %14:sgpr(s32), %15:sgpr(s32) = G_UNMERGE_VALUES %10(<4 x s32>) - %16:sgpr(s32) = G_ADD %13, %11 - G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:sgpr(i32) = COPY $sgpr4 + %6:vgpr(i32) = COPY $vgpr0 + %7:vgpr(i32) = COPY $vgpr1 + %8:vgpr(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:sgpr(i32) = G_CONSTANT i32 0 + %10:sgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x i32>), %9(i32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + %11:sgpr(i32) = G_CONSTANT i32 1 + %12:sgpr(i32), %13:sgpr(i32), %14:sgpr(i32), %15:sgpr(i32) = G_UNMERGE_VALUES %10(<4 x i32>) + %16:sgpr(i32) = G_ADD %13, %11 + G_STORE %16(i32), %8(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -135,39 +141,39 @@ body: | ; CHECK-LABEL: name: buffer_load_divergent ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[UV1]], [[COPY8]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = COPY $sgpr3 - %4:sgpr(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) - %5:vgpr(s32) = COPY $vgpr0 - %6:vgpr(s32) = COPY $vgpr1 - %7:vgpr(s32) = COPY $vgpr2 - %8:vgpr(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:sgpr(s32) = G_CONSTANT i32 0 - %10:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - %11:sgpr(s32) = G_CONSTANT i32 1 - %12:vgpr(s32), %13:vgpr(s32), %14:vgpr(s32), %15:vgpr(s32) = G_UNMERGE_VALUES %10(<4 x s32>) - %16:vgpr(s32) = G_ADD %13, %11 - G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:sgpr(i32) = COPY $sgpr3 + %4:sgpr(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:vgpr(i32) = COPY $vgpr0 + %6:vgpr(i32) = COPY $vgpr1 + %7:vgpr(i32) = COPY $vgpr2 + %8:vgpr(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:sgpr(i32) = G_CONSTANT i32 0 + %10:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x i32>), %9(i32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + %11:sgpr(i32) = G_CONSTANT i32 1 + %12:vgpr(i32), %13:vgpr(i32), %14:vgpr(i32), %15:vgpr(i32) = G_UNMERGE_VALUES %10(<4 x i32>) + %16:vgpr(i32) = G_ADD %13, %11 + G_STORE %16(i32), %8(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -181,33 +187,33 @@ body: | ; CHECK-LABEL: name: vgpr_and_i64 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV3:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; CHECK-NEXT: G_STORE [[MV3]](i64), [[MV2]](p1) :: (store (i64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s64) = G_MERGE_VALUES %0(s32), %1(s32) - %3:vgpr(s32) = COPY $vgpr2 - %4:vgpr(s32) = COPY $vgpr3 - %5:vgpr(s64) = G_MERGE_VALUES %3(s32), %4(s32) - %6:vgpr(s32) = COPY $vgpr4 - %7:vgpr(s32) = COPY $vgpr5 - %8:vgpr(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:vgpr(s64) = G_AND %2, %5 - G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:vgpr(i32) = COPY $vgpr2 + %4:vgpr(i32) = COPY $vgpr3 + %5:vgpr(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:vgpr(i32) = COPY $vgpr4 + %7:vgpr(i32) = COPY $vgpr5 + %8:vgpr(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:vgpr(i64) = G_AND %2, %5 + G_STORE %9(i64), %8(p1) :: (store (i64), addrspace 1) S_ENDPGM 0 ... @@ -221,26 +227,26 @@ body: | ; CHECK-LABEL: name: abs_sgpr_i16 ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(i32) = G_ABS [[SEXT]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[ABS]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[ANYEXT]](i32) + ; CHECK-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i16), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0(s32) - %2:vgpr(s32) = COPY $vgpr0 - %3:vgpr(s32) = COPY $vgpr1 - %4:vgpr(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:sgpr(s16) = G_ABS %1 - %6:sgpr(s32) = G_ANYEXT %5(s16) - G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i16) = G_TRUNC %0(i32) + %2:vgpr(i32) = COPY $vgpr0 + %3:vgpr(i32) = COPY $vgpr1 + %4:vgpr(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:sgpr(i16) = G_ABS %1 + %6:sgpr(i32) = G_ANYEXT %5(i16) + G_STORE %6(i32), %4(p1) :: (store (i16), addrspace 1) S_ENDPGM 0 ... @@ -254,67 +260,71 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY3]](i32), [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP1]], [[C2]] + ; CHECK-NEXT: G_BRCOND [[AND]](i32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C3]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ult), [[COPY2]](i32), [[C3]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ICMP]](s32), %bb.0, [[ICMP2]](s32), %bb.1 - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[PHI]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C5]], [[C6]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[C7]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ICMP]](i32), %bb.0, [[ICMP2]](i32), %bb.1 + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[PHI]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND1]](i32), [[C5]], [[C6]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SELECT]], [[C7]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; CHECK-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:sgpr(s32) = COPY $sgpr0 - %4:sgpr(s32) = COPY $sgpr1 - %5:sgpr(s32) = G_CONSTANT i32 6 - %6:sgpr(s1) = G_ICMP intpred(uge), %3(s32), %5 - %7:sgpr(s32) = G_CONSTANT i32 0 - %8:sgpr(s1) = G_ICMP intpred(ne), %4(s32), %7 - G_BRCOND %8(s1), %bb.2 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:sgpr(i32) = COPY $sgpr0 + %4:sgpr(i32) = COPY $sgpr1 + %5:sgpr(i32) = G_CONSTANT i32 6 + %6:sgpr(i1) = G_ICMP intpred(uge), %3(i32), %5 + %7:sgpr(i32) = G_CONSTANT i32 0 + %8:sgpr(i1) = G_ICMP intpred(ne), %4(i32), %7 + G_BRCOND %8(i1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %9:sgpr(s32) = G_CONSTANT i32 1 - %10:sgpr(s1) = G_ICMP intpred(ult), %3(s32), %9 + %9:sgpr(i32) = G_CONSTANT i32 1 + %10:sgpr(i1) = G_ICMP intpred(ult), %3(i32), %9 bb.2: - %11:sgpr(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 - %12:sgpr(s32) = G_SEXT %11(s1) - %13:sgpr(s32) = G_CONSTANT i32 2 - %14:sgpr(s32) = G_ADD %12, %13 - G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + %11:sgpr(i1) = G_PHI %6(i1), %bb.0, %10(i1), %bb.1 + %12:sgpr(i32) = G_SEXT %11(i1) + %13:sgpr(i32) = G_CONSTANT i32 2 + %14:sgpr(i32) = G_ADD %12, %13 + G_STORE %14(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + ... --- @@ -327,33 +337,35 @@ body: | ; CHECK-LABEL: name: vcc_to_scc ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] - ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[FCMP]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[C]](f32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(oeq), [[COPY5]](f32), [[COPY6]] + ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(i32) = G_AMDGPU_COPY_SCC_VCC [[FCMP]](i1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i32), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; CHECK-NEXT: G_STORE [[COPY7]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:vgpr(s32) = COPY $vgpr0 - %4:vgpr(s32) = COPY $vgpr1 - %5:vgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - %7:sgpr(s1) = G_FCMP floatpred(oeq), %0(s32), %6 - %8:sgpr(s32) = G_SELECT %7(s1), %1, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:sgpr(i32) = COPY $sgpr2 + %3:vgpr(i32) = COPY $vgpr0 + %4:vgpr(i32) = COPY $vgpr1 + %5:vgpr(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + %7:sgpr(f32) = G_BITCAST %0(i32) + %8:sgpr(i1) = G_FCMP floatpred(oeq), %7(f32), %6 + %9:sgpr(i32) = G_SELECT %8(i1), %1, %2 + G_STORE %9(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -367,28 +379,28 @@ body: | ; CHECK-LABEL: name: scc_to_vcc ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(i1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(s32) = COPY $vgpr2 - %4:vgpr(s32) = COPY $vgpr3 - %5:vgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:sgpr(s32) = G_CONSTANT i32 0 - %7:sgpr(s1) = G_ICMP intpred(eq), %0(s32), %6 - %8:vgpr(s32) = G_SELECT %7(s1), %1, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:vgpr(i32) = COPY $vgpr2 + %4:vgpr(i32) = COPY $vgpr3 + %5:vgpr(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:sgpr(i32) = G_CONSTANT i32 0 + %7:sgpr(i1) = G_ICMP intpred(eq), %0(i32), %6 + %8:vgpr(i32) = G_SELECT %7(i1), %1, %2 + G_STORE %8(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -402,28 +414,28 @@ body: | ; CHECK-LABEL: name: vgpr_to_vcc_trunc ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[AND]](i32), [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(s32) = COPY $vgpr4 - %5:vgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:vcc(s1) = G_TRUNC %0(s32) - %7:vgpr(s32) = G_SELECT %6(s1), %1, %2 - G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(i32) = COPY $vgpr4 + %5:vgpr(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:vcc(i1) = G_TRUNC %0(i32) + %7:vgpr(i32) = G_SELECT %6(i1), %1, %2 + G_STORE %7(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -437,27 +449,27 @@ body: | ; CHECK-LABEL: name: zext ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i32), [[C1]], [[C2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; CHECK-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:sgpr(s32) = G_CONSTANT i32 10 - %5:sgpr(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:sgpr(s32) = G_ZEXT %5(s1) - G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:vgpr(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:sgpr(i32) = G_CONSTANT i32 10 + %5:sgpr(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:sgpr(i32) = G_ZEXT %5(i1) + G_STORE %6(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -471,28 +483,28 @@ body: | ; CHECK-LABEL: name: sext ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C2]], [[C3]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i32), [[C2]], [[C3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; CHECK-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 - %3:vgpr(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:sgpr(s32) = G_CONSTANT i32 10 - %5:sgpr(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:sgpr(s32) = G_SEXT %5(s1) - G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:vgpr(i32) = COPY $vgpr0 + %2:vgpr(i32) = COPY $vgpr1 + %3:vgpr(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:sgpr(i32) = G_CONSTANT i32 10 + %5:sgpr(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:sgpr(i32) = G_SEXT %5(i1) + G_STORE %6(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -506,33 +518,33 @@ body: | ; CHECK-LABEL: name: and_i1_vcc ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY]](i32), [[COPY4]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY1]](i32), [[COPY5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[AND]](i1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = COPY $vgpr3 - %4:vgpr(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:sgpr(s32) = G_CONSTANT i32 10 - %6:vcc(s1) = G_ICMP intpred(uge), %0(s32), %5 - %7:sgpr(s32) = G_CONSTANT i32 20 - %8:vcc(s1) = G_ICMP intpred(uge), %1(s32), %7 - %9:vcc(s1) = G_AND %6, %8 - %10:vgpr(s32) = G_SELECT %9(s1), %0, %1 - G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(i32) = COPY $vgpr3 + %4:vgpr(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:sgpr(i32) = G_CONSTANT i32 10 + %6:vcc(i1) = G_ICMP intpred(uge), %0(i32), %5 + %7:sgpr(i32) = G_CONSTANT i32 20 + %8:vcc(i1) = G_ICMP intpred(uge), %1(i32), %7 + %9:vcc(i1) = G_AND %6, %8 + %10:vgpr(i32) = G_SELECT %9(i1), %0, %1 + G_STORE %10(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -546,34 +558,34 @@ body: | ; CHECK-LABEL: name: and_i1_scc ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY1]](i32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[AND]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND1]](i32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; CHECK-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:vgpr(s32) = COPY $vgpr1 - %4:vgpr(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:sgpr(s32) = G_CONSTANT i32 10 - %6:sgpr(s1) = G_ICMP intpred(uge), %0(s32), %5 - %7:sgpr(s32) = G_CONSTANT i32 20 - %8:sgpr(s1) = G_ICMP intpred(uge), %1(s32), %7 - %9:sgpr(s1) = G_AND %6, %8 - %10:sgpr(s32) = G_SELECT %9(s1), %0, %1 - G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + %0:sgpr(i32) = COPY $sgpr0 + %1:sgpr(i32) = COPY $sgpr1 + %2:vgpr(i32) = COPY $vgpr0 + %3:vgpr(i32) = COPY $vgpr1 + %4:vgpr(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:sgpr(i32) = G_CONSTANT i32 10 + %6:sgpr(i1) = G_ICMP intpred(uge), %0(i32), %5 + %7:sgpr(i32) = G_CONSTANT i32 20 + %8:sgpr(i1) = G_ICMP intpred(uge), %1(i32), %7 + %9:sgpr(i1) = G_AND %6, %8 + %10:sgpr(i32) = G_SELECT %9(i1), %0, %1 + G_STORE %10(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -587,52 +599,52 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY4]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY4]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY5]](s32) - ; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[C]](i32), %bb.0, [[C1]](i32), %bb.1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY5]](i32) + ; CHECK-NEXT: G_STORE [[PHI]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:sgpr(s32) = G_CONSTANT i32 0 - %9:vcc(s1) = G_ICMP intpred(eq), %0(s32), %4 - %5:sreg_32_xm0_xexec(s1) = COPY %9(s1) - %6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:sgpr(i32) = G_CONSTANT i32 0 + %5:vcc(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:sreg_32_xm0_xexec(i1) = COPY %5(i1) + %7:sreg_32_xm0_xexec(i32) = SI_IF %6(i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %7:sgpr(s32) = G_CONSTANT i32 1 + %8:sgpr(i32) = G_CONSTANT i32 1 bb.2: - %8:vgpr(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1 - %10:sgpr(s32) = COPY %6(s32) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %10(s32) - G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + %9:vgpr(i32) = G_PHI %4(i32), %bb.0, %8(i32), %bb.1 + %10:sgpr(i32) = COPY %7(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %10(i32) + G_STORE %9(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -646,69 +658,71 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY3]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s32) = COPY [[INT]](s32) - ; CHECK-NEXT: SI_LOOP [[COPY4]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI %7(i32), %bb.1, [[C1]](i32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, %9(i32), %bb.1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[PHI1]], [[C2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(f32) = G_UITOFP [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(i32) = COPY [[INT]](i32) + ; CHECK-NEXT: SI_LOOP [[COPY4]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY5]] - ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(i32) = G_PHI [[ADD]](i32), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[PHI2]], [[COPY5]] + ; CHECK-NEXT: G_STORE [[MUL]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:sgpr(s32) = G_CONSTANT i32 -1 - %5:sgpr(s32) = G_CONSTANT i32 0 + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(i32) = COPY $vgpr2 + %3:vgpr(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:sgpr(i32) = G_CONSTANT i32 -1 + %5:sgpr(i32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:sgpr(s32) = G_PHI %17(s32), %bb.1, %5(s32), %bb.0 - %8:sgpr(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 - %10:sgpr(s32) = G_CONSTANT i32 1 - %9:sgpr(s32) = G_ADD %8, %10 - %11:sgpr(s32) = G_UITOFP %9(s32) - %12:vcc(s1) = G_FCMP floatpred(ogt), %11(s32), %0 - %17:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) - %7:sreg_32_xm0_xexec(s32) = COPY %17(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:sgpr(i32) = G_PHI %7(i32), %bb.1, %5(i32), %bb.0 + %8:sgpr(i32) = G_PHI %4(i32), %bb.0, %9(i32), %bb.1 + %10:sgpr(i32) = G_CONSTANT i32 1 + %9:sgpr(i32) = G_ADD %8, %10 + %11:sgpr(f32) = G_UITOFP %9(i32) + %12:vgpr(f32) = G_BITCAST %0(i32) + %13:vcc(i1) = G_FCMP floatpred(ogt), %11(f32), %12 + %7:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(i1), %6(i32) + %14:sreg_32_xm0_xexec(i32) = COPY %7(i32) + SI_LOOP %14(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %13:vgpr(s32) = G_PHI %9(s32), %bb.1 - %14:sgpr(s32) = G_PHI %17(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - %15:sgpr(s32) = G_CONSTANT i32 10 - %16:vgpr(s32) = G_MUL %13, %15 - G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + %15:vgpr(i32) = G_PHI %9(i32), %bb.1 + %16:sgpr(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(i32) + %17:sgpr(i32) = G_CONSTANT i32 10 + %18:vgpr(i32) = G_MUL %15, %17 + G_STORE %18(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -722,223 +736,223 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[PHI2]], [[C1]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[PHI2]](s32), [[ASHR]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[MV3]], [[C2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s64) = COPY [[SHL]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[COPY7]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY8]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:sreg_32(s1) = G_AMDGPU_COPY_VCC_SCC [[C4]](s32) - ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC]](s1), implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY9]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %13(i1), %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI %15(i32), %bb.3, [[C]](i32), %bb.0 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, %17(i32), %bb.3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[PHI2]], [[C1]](i32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[PHI2]](i32), [[ASHR]](i32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i64) = G_SHL [[MV3]], [[C2]](i32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(i64) = COPY [[SHL]](i64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[COPY7]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[C3]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[COPY8]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:sreg_32(i1) = G_AMDGPU_COPY_VCC_SCC [[C4]](i32) + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC]](i1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY9]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:sgpr(s64) = G_SHL [[MV3]], [[C5]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s64) = COPY [[SHL1]](s64) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[COPY11]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY12]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP1]](s1) - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC1:%[0-9]+]]:sreg_32(s1) = G_AMDGPU_COPY_VCC_SCC [[C7]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[AMDGPU_COPY_VCC_SCC1]](s1) - ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY13]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:sgpr(i64) = G_SHL [[MV3]], [[C5]](i32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(i64) = COPY [[SHL1]](i64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[COPY11]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[C6]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[LOAD1]](i32), [[COPY12]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP1]](i1) + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC1:%[0-9]+]]:sreg_32(i1) = G_AMDGPU_COPY_VCC_SCC [[C7]](i32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[AMDGPU_COPY_VCC_SCC1]](i1) + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY13]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %45(s1), %bb.5 - ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sgpr(s32) = G_PHI %46(s32), %bb.5, [[DEF]](s32), %bb.1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY16]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vcc(s1) = COPY [[COPY15]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI1]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s32) = COPY [[INT]](s32) - ; CHECK-NEXT: SI_LOOP [[COPY18]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, %45(i1), %bb.5 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sgpr(i32) = G_PHI %46(i32), %bb.5, [[DEF]](i32), %bb.1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY16]](i32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vcc(i1) = COPY [[COPY15]](i1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](i1), [[PHI1]](i32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(i32) = COPY [[INT]](i32) + ; CHECK-NEXT: SI_LOOP [[COPY18]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:sgpr(s64) = G_SHL [[MV3]], [[C8]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s64) = COPY [[SHL2]](s64) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[COPY19]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY20]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI2]], [[C9]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[PHI2]](s32), [[C10]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP2]], [[C9]] - ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC2:%[0-9]+]]:sreg_32(s1) = G_AMDGPU_COPY_VCC_SCC [[AND]](s32) - ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC2]](s1), implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; CHECK-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:sgpr(i64) = G_SHL [[MV3]], [[C8]](i32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(i64) = COPY [[SHL2]](i64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[COPY19]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr(i32) = COPY [[C9]](i32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[LOAD2]], [[COPY20]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD2]](p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(i32) = G_ADD [[PHI2]], [[C9]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ult), [[PHI2]](i32), [[C10]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP2]], [[C9]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC2:%[0-9]+]]:sreg_32(i1) = G_AMDGPU_COPY_VCC_SCC [[AND]](i32) + ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY14]](i1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC2]](i1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[AMDGPU_COPY_VCC_SCC1]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; CHECK-NEXT: [[PHI6:%[0-9]+]]:sgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[COPY21]](s1) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF1]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY23]](s32) - ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY22]](s1), implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[AMDGPU_COPY_VCC_SCC1]](i1), %bb.2, [[S_OR_B32_1]](i1), %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:sgpr(i32) = G_PHI [[ADD1]](i32), %bb.4, [[DEF]](i32), %bb.2 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:sreg_32(i1) = COPY [[COPY21]](i1) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF1]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY23]](i32) + ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY22]](i1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.3 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:vgpr(s32) = COPY $vgpr2 - %4:vgpr(s32) = COPY $vgpr3 - %5:vgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:vgpr(s32) = COPY $vgpr4 - %7:vgpr(s32) = COPY $vgpr5 - %8:vgpr(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:sgpr(s32) = G_IMPLICIT_DEF - %10:sgpr(s32) = G_CONSTANT i32 0 - %11:sreg_32(s1) = IMPLICIT_DEF + %0:vgpr(i32) = COPY $vgpr0 + %1:vgpr(i32) = COPY $vgpr1 + %2:vgpr(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:vgpr(i32) = COPY $vgpr2 + %4:vgpr(i32) = COPY $vgpr3 + %5:vgpr(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:vgpr(i32) = COPY $vgpr4 + %7:vgpr(i32) = COPY $vgpr5 + %8:vgpr(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:sgpr(i32) = G_IMPLICIT_DEF + %10:sgpr(i32) = G_CONSTANT i32 0 + %11:sreg_32(i1) = IMPLICIT_DEF bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 - %14:sgpr(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 - %16:sgpr(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 - %18:sreg_32(s1) = COPY %12(s1) - %19:sgpr(s64) = G_SEXT %16(s32) - %20:sgpr(s32) = G_CONSTANT i32 2 - %21:sgpr(s64) = G_SHL %19, %20(s32) - %22:vgpr(p1) = G_PTR_ADD %5, %21(s64) - %23:vgpr(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) - %24:sgpr(s32) = G_CONSTANT i32 0 - %25:vcc(s1) = G_ICMP intpred(ne), %23(s32), %24 - %26:sreg_32_xm0_xexec(s1) = COPY %25(s1) - %27:sgpr(s1) = G_CONSTANT i1 true - %28:sreg_32(s1) = COPY %27(s1) - %29:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc - %30:sreg_32(s1) = S_AND_B32 $exec_lo, %28(s1), implicit-def $scc - %31:sreg_32(s1) = S_OR_B32 %29(s1), %30(s1), implicit-def $scc - %32:sreg_32(s1) = COPY %31(s1) - %33:sreg_32_xm0_xexec(s32) = SI_IF %26(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %12:sreg_32(i1) = PHI %11(i1), %bb.0, %13(i1), %bb.3 + %14:sgpr(i32) = G_PHI %15(i32), %bb.3, %10(i32), %bb.0 + %16:sgpr(i32) = G_PHI %10(i32), %bb.0, %17(i32), %bb.3 + %18:sreg_32(i1) = COPY %12(i1) + %19:sgpr(i64) = G_SEXT %16(i32) + %20:sgpr(i32) = G_CONSTANT i32 2 + %21:sgpr(i64) = G_SHL %19, %20(i32) + %22:vgpr(p1) = G_PTR_ADD %5, %21(i64) + %23:vgpr(i32) = G_LOAD %22(p1) :: (load (i32), addrspace 1) + %24:sgpr(i32) = G_CONSTANT i32 0 + %25:vcc(i1) = G_ICMP intpred(ne), %23(i32), %24 + %26:sreg_32_xm0_xexec(i1) = COPY %25(i1) + %27:sgpr(i1) = G_CONSTANT i1 true + %28:sreg_32(i1) = COPY %27(i1) + %29:sreg_32(i1) = S_ANDN2_B32 %18(i1), $exec_lo, implicit-def $scc + %30:sreg_32(i1) = S_AND_B32 $exec_lo, %28(i1), implicit-def $scc + %31:sreg_32(i1) = S_OR_B32 %29(i1), %30(i1), implicit-def $scc + %32:sreg_32(i1) = COPY %31(i1) + %33:sreg_32_xm0_xexec(i32) = SI_IF %26(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %34:sgpr(s32) = G_CONSTANT i32 2 - %35:sgpr(s64) = G_SHL %19, %34(s32) - %36:vgpr(p1) = G_PTR_ADD %8, %35(s64) - %37:vgpr(s32) = G_LOAD %36(p1) :: (load (s32), addrspace 1) - %38:sgpr(s32) = G_CONSTANT i32 0 - %39:vcc(s1) = G_ICMP intpred(ne), %37(s32), %38 - %40:sreg_32_xm0_xexec(s1) = COPY %39(s1) - %41:sgpr(s1) = G_CONSTANT i1 true - %42:sreg_32(s1) = COPY %41(s1) - %43:sreg_32(s1) = COPY %42(s1) - %44:sreg_32_xm0_xexec(s32) = SI_IF %40(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %34:sgpr(i32) = G_CONSTANT i32 2 + %35:sgpr(i64) = G_SHL %19, %34(i32) + %36:vgpr(p1) = G_PTR_ADD %8, %35(i64) + %37:vgpr(i32) = G_LOAD %36(p1) :: (load (i32), addrspace 1) + %38:sgpr(i32) = G_CONSTANT i32 0 + %39:vcc(i1) = G_ICMP intpred(ne), %37(i32), %38 + %40:sreg_32_xm0_xexec(i1) = COPY %39(i1) + %41:sgpr(i1) = G_CONSTANT i1 true + %42:sreg_32(i1) = COPY %41(i1) + %43:sreg_32(i1) = COPY %42(i1) + %44:sreg_32_xm0_xexec(i32) = SI_IF %40(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %13:sreg_32(s1) = PHI %31(s1), %bb.1, %45(s1), %bb.5 - %17:sgpr(s32) = G_PHI %46(s32), %bb.5, %9(s32), %bb.1 - %47:sreg_32(s1) = COPY %13(s1) - %48:sgpr(s32) = COPY %33(s32) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %48(s32) - %49:vcc(s1) = COPY %47(s1) - %15:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %49(s1), %14(s32) - %50:sreg_32_xm0_xexec(s32) = COPY %15(s32) - SI_LOOP %50(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %13:sreg_32(i1) = PHI %31(i1), %bb.1, %45(i1), %bb.5 + %17:sgpr(i32) = G_PHI %46(i32), %bb.5, %9(i32), %bb.1 + %47:sreg_32(i1) = COPY %13(i1) + %48:sgpr(i32) = COPY %33(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %48(i32) + %49:vcc(i1) = COPY %47(i1) + %15:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %49(i1), %14(i32) + %50:sreg_32_xm0_xexec(i32) = COPY %15(i32) + SI_LOOP %50(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.4: successors: %bb.5(0x80000000) - %51:sgpr(s32) = G_CONSTANT i32 2 - %52:sgpr(s64) = G_SHL %19, %51(s32) - %53:vgpr(p1) = G_PTR_ADD %2, %52(s64) - %54:vgpr(s32) = G_LOAD %53(p1) :: (load (s32), addrspace 1) - %55:sgpr(s32) = G_CONSTANT i32 1 - %56:vgpr(s32) = G_ADD %54, %55 - G_STORE %56(s32), %53(p1) :: (store (s32), addrspace 1) - %57:sgpr(s32) = G_ADD %16, %55 - %58:sgpr(s32) = G_CONSTANT i32 100 - %59:sgpr(s1) = G_ICMP intpred(ult), %16(s32), %58 - %60:sreg_32(s1) = COPY %59(s1) - %61:sreg_32(s1) = S_ANDN2_B32 %43(s1), $exec_lo, implicit-def $scc - %62:sreg_32(s1) = S_AND_B32 $exec_lo, %60(s1), implicit-def $scc - %63:sreg_32(s1) = S_OR_B32 %61(s1), %62(s1), implicit-def $scc + %51:sgpr(i32) = G_CONSTANT i32 2 + %52:sgpr(i64) = G_SHL %19, %51(i32) + %53:vgpr(p1) = G_PTR_ADD %2, %52(i64) + %54:vgpr(i32) = G_LOAD %53(p1) :: (load (i32), addrspace 1) + %55:sgpr(i32) = G_CONSTANT i32 1 + %56:vgpr(i32) = G_ADD %54, %55 + G_STORE %56(i32), %53(p1) :: (store (i32), addrspace 1) + %57:sgpr(i32) = G_ADD %16, %55 + %58:sgpr(i32) = G_CONSTANT i32 100 + %59:sgpr(i1) = G_ICMP intpred(ult), %16(i32), %58 + %60:sreg_32(i1) = COPY %59(i1) + %61:sreg_32(i1) = S_ANDN2_B32 %43(i1), $exec_lo, implicit-def $scc + %62:sreg_32(i1) = S_AND_B32 $exec_lo, %60(i1), implicit-def $scc + %63:sreg_32(i1) = S_OR_B32 %61(i1), %62(i1), implicit-def $scc bb.5: successors: %bb.3(0x80000000) - %64:sreg_32(s1) = PHI %42(s1), %bb.2, %63(s1), %bb.4 - %46:sgpr(s32) = G_PHI %57(s32), %bb.4, %9(s32), %bb.2 - %65:sreg_32(s1) = COPY %64(s1) - %66:sreg_32(s1) = COPY %65(s1) - %67:sgpr(s32) = COPY %44(s32) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %67(s32) - %68:sreg_32(s1) = S_ANDN2_B32 %32(s1), $exec_lo, implicit-def $scc - %69:sreg_32(s1) = S_AND_B32 $exec_lo, %66(s1), implicit-def $scc - %45:sreg_32(s1) = S_OR_B32 %68(s1), %69(s1), implicit-def $scc + %64:sreg_32(i1) = PHI %42(i1), %bb.2, %63(i1), %bb.4 + %46:sgpr(i32) = G_PHI %57(i32), %bb.4, %9(i32), %bb.2 + %65:sreg_32(i1) = COPY %64(i1) + %66:sreg_32(i1) = COPY %65(i1) + %67:sgpr(i32) = COPY %44(i32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %67(i32) + %68:sreg_32(i1) = S_ANDN2_B32 %32(i1), $exec_lo, implicit-def $scc + %69:sreg_32(i1) = S_AND_B32 $exec_lo, %66(i1), implicit-def $scc + %45:sreg_32(i1) = S_OR_B32 %68(i1), %69(i1), implicit-def $scc G_BR %bb.3 bb.6: - %70:sgpr(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %70(s32) + %70:sgpr(i32) = G_PHI %15(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %70(i32) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir index 8f3495ea87eec..b6eb89e65ea22 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir @@ -11,23 +11,25 @@ body: | ; CHECK-LABEL: name: uniform_in_vgpr ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[COPY]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY1]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(i32) = G_FPTOUI [[BITCAST]](f32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[FPTOUI]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_FPTOUI %0(s32) - %6:_(s32) = G_ADD %5, %1 - G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(f32) = G_BITCAST %0(i32) + %6:_(i32) = G_FPTOUI %5(f32) + %7:_(i32) = G_ADD %6, %1 + G_STORE %7(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -41,27 +43,31 @@ body: | ; CHECK-LABEL: name: back_to_back_uniform_in_vgpr ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:sgpr(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(i32) = G_FPTOUI [[FADD]](f32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[FPTOUI]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_FADD %0, %1 - %7:_(s32) = G_FPTOUI %6(s32) - %8:_(s32) = G_ADD %7, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FADD %6, %7 + %9:_(i32) = G_FPTOUI %8(f32) + %10:_(i32) = G_ADD %9, %2 + G_STORE %10(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -75,37 +81,37 @@ body: | ; CHECK-LABEL: name: buffer_load_uniform ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[UV1]], [[C1]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32), [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) - %5:_(s32) = COPY $sgpr4 - %6:_(s32) = COPY $vgpr0 - %7:_(s32) = COPY $vgpr1 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - %11:_(s32) = G_CONSTANT i32 1 - %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) - %16:_(s32) = G_ADD %13, %11 - G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(i32) = COPY $sgpr4 + %6:_(i32) = COPY $vgpr0 + %7:_(i32) = COPY $vgpr1 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(<4 x i32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x i32>), %9(i32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + %11:_(i32) = G_CONSTANT i32 1 + %12:_(i32), %13:_(i32), %14:_(i32), %15:_(i32) = G_UNMERGE_VALUES %10(<4 x i32>) + %16:_(i32) = G_ADD %13, %11 + G_STORE %16(i32), %8(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -119,37 +125,37 @@ body: | ; CHECK-LABEL: name: buffer_load_divergent ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[C1]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[C]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) - %5:_(s32) = COPY $vgpr0 - %6:_(s32) = COPY $vgpr1 - %7:_(s32) = COPY $vgpr2 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - %11:_(s32) = G_CONSTANT i32 1 - %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) - %16:_(s32) = G_ADD %13, %11 - G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(i32) = COPY $vgpr0 + %6:_(i32) = COPY $vgpr1 + %7:_(i32) = COPY $vgpr2 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(<4 x i32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x i32>), %9(i32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + %11:_(i32) = G_CONSTANT i32 1 + %12:_(i32), %13:_(i32), %14:_(i32), %15:_(i32) = G_UNMERGE_VALUES %10(<4 x i32>) + %16:_(i32) = G_ADD %13, %11 + G_STORE %16(i32), %8(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -163,29 +169,29 @@ body: | ; CHECK-LABEL: name: vgpr_and_i64 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s64) = G_AND [[MV]], [[MV1]] - ; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(i64) = G_AND [[MV]], [[MV1]] + ; CHECK-NEXT: G_STORE [[AND]](i64), [[MV2]](p1) :: (store (i64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s64) = G_AND %2, %5 - G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i64) = G_AND %2, %5 + G_STORE %9(i64), %8(p1) :: (store (i64), addrspace 1) S_ENDPGM 0 ... @@ -199,23 +205,23 @@ body: | ; CHECK-LABEL: name: abs_sgpr_i16 ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s16) = G_ABS [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[ABS]](s16) - ; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(i16) = G_ABS [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[ABS]](i16) + ; CHECK-NEXT: G_STORE [[ANYEXT]](i32), [[MV]](p1) :: (store (i16), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s16) = G_ABS %1 - %6:_(s32) = G_ANYEXT %5(s16) - G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i16) = G_ABS %1 + %6:_(i32) = G_ANYEXT %5(i16) + G_STORE %6(i32), %4(p1) :: (store (i16), addrspace 1) S_ENDPGM 0 ... @@ -229,60 +235,64 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] - ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(ne), [[COPY3]](i32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](i1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(ult), [[COPY2]](i32), [[C2]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[PHI]](s1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SEXT]], [[C3]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(i1) = G_PHI [[ICMP]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[PHI]](i1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SEXT]], [[C3]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $sgpr0 - %4:_(s32) = COPY $sgpr1 - %5:_(s32) = G_CONSTANT i32 6 - %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7 - G_BRCOND %8(s1), %bb.2 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $sgpr0 + %4:_(i32) = COPY $sgpr1 + %5:_(i32) = G_CONSTANT i32 6 + %6:_(i1) = G_ICMP intpred(uge), %3(i32), %5 + %7:_(i32) = G_CONSTANT i32 0 + %8:_(i1) = G_ICMP intpred(ne), %4(i32), %7 + G_BRCOND %8(i1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %9:_(s32) = G_CONSTANT i32 1 - %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9 + %9:_(i32) = G_CONSTANT i32 1 + %10:_(i1) = G_ICMP intpred(ult), %3(i32), %9 bb.2: - %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 - %12:_(s32) = G_SEXT %11(s1) - %13:_(s32) = G_CONSTANT i32 2 - %14:_(s32) = G_ADD %12, %13 - G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + %11:_(i1) = G_PHI %6(i1), %bb.0, %10(i1), %bb.1 + %12:_(i32) = G_SEXT %11(i1) + %13:_(i32) = G_CONSTANT i32 2 + %14:_(i32) = G_ADD %12, %13 + G_STORE %14(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + ... --- @@ -295,27 +305,29 @@ body: | ; CHECK-LABEL: name: vcc_to_scc ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:sgpr(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:sgpr(i1) = G_FCMP floatpred(oeq), [[BITCAST]](f32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[FCMP]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_FCONSTANT float 0.000000e+00 - %7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6 - %8:_(s32) = G_SELECT %7(s1), %1, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(f32) = G_FCONSTANT float 0.000000e+00 + %7:_(f32) = G_BITCAST %0(i32) + %8:_(i1) = G_FCMP floatpred(oeq), %7(f32), %6 + %9:_(i32) = G_SELECT %8(i1), %1, %2 + G_STORE %9(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -329,27 +341,27 @@ body: | ; CHECK-LABEL: name: scc_to_vcc ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_CONSTANT i32 0 - %7:_(s1) = G_ICMP intpred(eq), %0(s32), %6 - %8:_(s32) = G_SELECT %7(s1), %1, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 0 + %7:_(i1) = G_ICMP intpred(eq), %0(i32), %6 + %8:_(i32) = G_SELECT %7(i1), %1, %2 + G_STORE %8(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -363,25 +375,25 @@ body: | ; CHECK-LABEL: name: vgpr_to_vcc_trunc ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vcc(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[TRUNC]](i1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s1) = G_TRUNC %0(s32) - %7:_(s32) = G_SELECT %6(s1), %1, %2 - G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i1) = G_TRUNC %0(i32) + %7:_(i32) = G_SELECT %6(i1), %1, %2 + G_STORE %7(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -395,23 +407,23 @@ body: | ; CHECK-LABEL: name: zext ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[ICMP]](i1) + ; CHECK-NEXT: G_STORE [[ZEXT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 10 - %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:_(s32) = G_ZEXT %5(s1) - G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 10 + %5:_(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:_(i32) = G_ZEXT %5(i1) + G_STORE %6(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -425,23 +437,23 @@ body: | ; CHECK-LABEL: name: sext ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[ICMP]](s1) - ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[ICMP]](i1) + ; CHECK-NEXT: G_STORE [[SEXT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 10 - %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:_(s32) = G_SEXT %5(s1) - G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 10 + %5:_(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:_(i32) = G_SEXT %5(i1) + G_STORE %6(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -455,31 +467,31 @@ body: | ; CHECK-LABEL: name: and_i1_vcc ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY1]](i32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[AND]](i1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 10 - %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 - %7:_(s32) = G_CONSTANT i32 20 - %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 - %9:_(s1) = G_AND %6, %8 - %10:_(s32) = G_SELECT %9(s1), %0, %1 - G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = G_CONSTANT i32 10 + %6:_(i1) = G_ICMP intpred(uge), %0(i32), %5 + %7:_(i32) = G_CONSTANT i32 20 + %8:_(i1) = G_ICMP intpred(uge), %1(i32), %7 + %9:_(i1) = G_AND %6, %8 + %10:_(i32) = G_SELECT %9(i1), %0, %1 + G_STORE %10(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -493,31 +505,31 @@ body: | ; CHECK-LABEL: name: and_i1_scc ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(uge), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(uge), [[COPY1]](i32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 10 - %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 - %7:_(s32) = G_CONSTANT i32 20 - %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 - %9:_(s1) = G_AND %6, %8 - %10:_(s32) = G_SELECT %9(s1), %0, %1 - G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = G_CONSTANT i32 10 + %6:_(i1) = G_ICMP intpred(uge), %0(i32), %5 + %7:_(i32) = G_CONSTANT i32 20 + %8:_(i1) = G_ICMP intpred(uge), %1(i32), %7 + %9:_(i1) = G_AND %6, %8 + %10:_(i32) = G_SELECT %9(i1), %0, %1 + G_STORE %10(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -531,50 +543,54 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY3]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY3]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY4]](s32) - ; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[C]](i32), %bb.0, [[C1]](i32), %bb.1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY4]](i32) + ; CHECK-NEXT: G_STORE [[PHI]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 0 - %5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 0 + %5:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:sreg_32_xm0_xexec(i32) = SI_IF %5(i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %7:_(s32) = G_CONSTANT i32 1 + %7:_(i32) = G_CONSTANT i32 1 bb.2: - %8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32) - G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + %8:_(i32) = G_PHI %4(i32), %bb.0, %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(i32) + G_STORE %8(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + ... --- @@ -587,67 +603,73 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]] - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:sgpr(s32) = G_UITOFP [[ADD]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec(s32) = COPY [[INT]](s32) - ; CHECK-NEXT: SI_LOOP [[COPY3]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI %18(i32), %bb.1, [[C1]](i32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, %9(i32), %bb.1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[PHI1]], [[C2]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:sgpr(f32) = G_UITOFP [[ADD]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI]](i32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec(i32) = COPY [[INT]](i32) + ; CHECK-NEXT: SI_LOOP [[COPY3]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[C3]] - ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(i32) = G_PHI [[ADD]](i32), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[PHI2]], [[C3]] + ; CHECK-NEXT: G_STORE [[MUL]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 -1 - %5:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 -1 + %5:_(i32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 - %10:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = G_ADD %8, %10 - %11:_(s32) = G_UITOFP %9(s32) - %12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0 - %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:_(i32) = G_PHI %7(i32), %bb.1, %5(i32), %bb.0 + %8:_(i32) = G_PHI %4(i32), %bb.0, %9(i32), %bb.1 + %10:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = G_ADD %8, %10 + %11:_(f32) = G_UITOFP %9(i32) + %12:_(f32) = G_BITCAST %0(i32) + %13:_(i1) = G_FCMP floatpred(ogt), %11(f32), %12 + %7:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(i1), %6(i32) + SI_LOOP %7(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %13:_(s32) = G_PHI %9(s32), %bb.1 - %14:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - %15:_(s32) = G_CONSTANT i32 10 - %16:_(s32) = G_MUL %13, %15 - G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + %14:_(i32) = G_PHI %9(i32), %bb.1 + %15:_(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(i32) + %16:_(i32) = G_CONSTANT i32 10 + %17:_(i32) = G_MUL %14, %16 + G_STORE %17(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + ... --- @@ -660,208 +682,220 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[PHI2]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[SEXT]], [[C1]](s32) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %13(i1), %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI %68(i32), %bb.3, [[C]](i32), %bb.0 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, %17(i32), %bb.3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i64) = G_SEXT [[PHI2]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i64) = G_SHL [[SEXT]], [[C1]](i32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[C2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(i1) = COPY [[C3]](i1) + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY8]](i1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY7]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:sgpr(s64) = G_SHL [[SEXT]], [[C4]](s32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP1]](s1) - ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1) - ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY10]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:sgpr(i64) = G_SHL [[SEXT]], [[C4]](i32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](i64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[LOAD1]](i32), [[C5]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP1]](i1) + ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(i1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[C6]](i1) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(i1) = COPY [[COPY11]](i1) + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY10]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 - ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY14]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vcc(s1) = COPY [[COPY13]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI1]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s32) = COPY [[INT]](s32) - ; CHECK-NEXT: SI_LOOP [[COPY16]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, %43(i1), %bb.5 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sgpr(i32) = G_PHI %44(i32), %bb.5, [[DEF]](i32), %bb.1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY14]](i32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vcc(i1) = COPY [[COPY13]](i1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](i1), [[PHI1]](i32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(i32) = COPY [[INT]](i32) + ; CHECK-NEXT: SI_LOOP [[COPY16]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: G_BR %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:sgpr(s64) = G_SHL [[SEXT]], [[C7]](s32) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; CHECK-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[C8]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI2]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:sgpr(i64) = G_SHL [[SEXT]], [[C7]](i32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](i64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32), addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[LOAD2]], [[C8]] + ; CHECK-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD2]](p1) :: (store (i32), addrspace 1) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(i32) = G_ADD [[PHI2]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i1) = G_ICMP intpred(ult), [[PHI2]](i32), [[C9]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY12]](i1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY17]](i1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; CHECK-NEXT: [[PHI6:%[0-9]+]]:sgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[COPY18]](s1) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF1]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY20]](s32) - ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[COPY11]](i1), %bb.2, [[S_OR_B32_1]](i1), %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:sgpr(i32) = G_PHI [[ADD1]](i32), %bb.4, [[DEF]](i32), %bb.2 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_32(i1) = COPY [[COPY18]](i1) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF1]](i32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY20]](i32) + ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY9]](i1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY19]](i1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.3 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_IMPLICIT_DEF - %10:_(s32) = G_CONSTANT i32 0 - %11:sreg_32(s1) = IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_IMPLICIT_DEF + %10:_(i32) = G_CONSTANT i32 0 + %11:sreg_32(i1) = IMPLICIT_DEF bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 - %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 - %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 - %18:sreg_32(s1) = COPY %12(s1) - %19:_(s64) = G_SEXT %16(s32) - %20:_(s32) = G_CONSTANT i32 2 - %21:_(s64) = G_SHL %19, %20(s32) - %22:_(p1) = G_PTR_ADD %5, %21(s64) - %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) - %24:_(s32) = G_CONSTANT i32 0 - %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 - %26:_(s1) = G_CONSTANT i1 true - %27:sreg_32(s1) = COPY %26(s1) - %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc - %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc - %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc - %31:sreg_32(s1) = COPY %30(s1) - %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %12:sreg_32(i1) = PHI %11(i1), %bb.0, %13(i1), %bb.3 + %14:_(i32) = G_PHI %15(i32), %bb.3, %10(i32), %bb.0 + %16:_(i32) = G_PHI %10(i32), %bb.0, %17(i32), %bb.3 + %18:sreg_32(i1) = COPY %12(i1) + %19:_(i64) = G_SEXT %16(i32) + %20:_(i32) = G_CONSTANT i32 2 + %21:_(i64) = G_SHL %19, %20(i32) + %22:_(p1) = G_PTR_ADD %5, %21(i64) + %23:_(i32) = G_LOAD %22(p1) :: (load (i32), addrspace 1) + %24:_(i32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %23(i32), %24 + %26:_(i1) = G_CONSTANT i1 true + %27:sreg_32(i1) = COPY %26(i1) + %28:sreg_32(i1) = S_ANDN2_B32 %18(i1), $exec_lo, implicit-def $scc + %29:sreg_32(i1) = S_AND_B32 $exec_lo, %27(i1), implicit-def $scc + %30:sreg_32(i1) = S_OR_B32 %28(i1), %29(i1), implicit-def $scc + %31:sreg_32(i1) = COPY %30(i1) + %32:sreg_32_xm0_xexec(i32) = SI_IF %25(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %33:_(s32) = G_CONSTANT i32 2 - %34:_(s64) = G_SHL %19, %33(s32) - %35:_(p1) = G_PTR_ADD %8, %34(s64) - %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) - %37:_(s32) = G_CONSTANT i32 0 - %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 - %39:_(s1) = G_CONSTANT i1 true - %40:sreg_32(s1) = COPY %39(s1) - %41:sreg_32(s1) = COPY %40(s1) - %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %33:_(i32) = G_CONSTANT i32 2 + %34:_(i64) = G_SHL %19, %33(i32) + %35:_(p1) = G_PTR_ADD %8, %34(i64) + %36:_(i32) = G_LOAD %35(p1) :: (load (i32), addrspace 1) + %37:_(i32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %36(i32), %37 + %39:_(i1) = G_CONSTANT i1 true + %40:sreg_32(i1) = COPY %39(i1) + %41:sreg_32(i1) = COPY %40(i1) + %42:sreg_32_xm0_xexec(i32) = SI_IF %38(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 - %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 - %45:sreg_32(s1) = COPY %13(s1) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) - %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) - SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %13:sreg_32(i1) = PHI %30(i1), %bb.1, %43(i1), %bb.5 + %17:_(i32) = G_PHI %44(i32), %bb.5, %9(i32), %bb.1 + %45:sreg_32(i1) = COPY %13(i1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(i32) + %15:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(i1), %14(i32) + SI_LOOP %15(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.4: successors: %bb.5(0x80000000) - %46:_(s32) = G_CONSTANT i32 2 - %47:_(s64) = G_SHL %19, %46(s32) - %48:_(p1) = G_PTR_ADD %2, %47(s64) - %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) - %50:_(s32) = G_CONSTANT i32 1 - %51:_(s32) = G_ADD %49, %50 - G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) - %52:_(s32) = G_ADD %16, %50 - %53:_(s32) = G_CONSTANT i32 100 - %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 - %55:sreg_32(s1) = COPY %54(s1) - %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc - %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc - %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + %46:_(i32) = G_CONSTANT i32 2 + %47:_(i64) = G_SHL %19, %46(i32) + %48:_(p1) = G_PTR_ADD %2, %47(i64) + %49:_(i32) = G_LOAD %48(p1) :: (load (i32), addrspace 1) + %50:_(i32) = G_CONSTANT i32 1 + %51:_(i32) = G_ADD %49, %50 + G_STORE %51(i32), %48(p1) :: (store (i32), addrspace 1) + %52:_(i32) = G_ADD %16, %50 + %53:_(i32) = G_CONSTANT i32 100 + %54:_(i1) = G_ICMP intpred(ult), %16(i32), %53 + %55:sreg_32(i1) = COPY %54(i1) + %56:sreg_32(i1) = S_ANDN2_B32 %41(i1), $exec_lo, implicit-def $scc + %57:sreg_32(i1) = S_AND_B32 $exec_lo, %55(i1), implicit-def $scc + %58:sreg_32(i1) = S_OR_B32 %56(i1), %57(i1), implicit-def $scc bb.5: successors: %bb.3(0x80000000) - %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 - %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 - %60:sreg_32(s1) = COPY %59(s1) - %61:sreg_32(s1) = COPY %60(s1) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) - %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc - %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc - %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + %59:sreg_32(i1) = PHI %40(i1), %bb.2, %58(i1), %bb.4 + %44:_(i32) = G_PHI %52(i32), %bb.4, %9(i32), %bb.2 + %60:sreg_32(i1) = COPY %59(i1) + %61:sreg_32(i1) = COPY %60(i1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(i32) + %62:sreg_32(i1) = S_ANDN2_B32 %31(i1), $exec_lo, implicit-def $scc + %63:sreg_32(i1) = S_AND_B32 $exec_lo, %61(i1), implicit-def $scc + %43:sreg_32(i1) = S_OR_B32 %62(i1), %63(i1), implicit-def $scc G_BR %bb.3 bb.6: - %64:_(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + %64:_(i32) = G_PHI %15(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(i32) S_ENDPGM 0 + + + + + + + + + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir index 733f3d302472c..00c62a92ab7fd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir @@ -14,80 +14,90 @@ body: | ; OLD_RBS_GFX10-LABEL: name: salu_float ; OLD_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; OLD_RBS_GFX10-NEXT: {{ $}} - ; OLD_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; OLD_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; OLD_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; OLD_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; OLD_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] - ; OLD_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) - ; OLD_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; OLD_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] - ; OLD_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; OLD_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; OLD_RBS_GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; OLD_RBS_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; OLD_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; OLD_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; OLD_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY5]], [[COPY6]] + ; OLD_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[FADD]](f32) + ; OLD_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; OLD_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[FPTOUI]], [[COPY7]] + ; OLD_RBS_GFX10-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS_GFX10-NEXT: S_ENDPGM 0 ; ; OLD_RBS_GFX12-LABEL: name: salu_float ; OLD_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; OLD_RBS_GFX12-NEXT: {{ $}} - ; OLD_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; OLD_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; OLD_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] - ; OLD_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) - ; OLD_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] - ; OLD_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; OLD_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; OLD_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; OLD_RBS_GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; OLD_RBS_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; OLD_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; OLD_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(i32) = G_FPTOUI [[FADD]](f32) + ; OLD_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[FPTOUI]], [[COPY2]] + ; OLD_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; OLD_RBS_GFX12-NEXT: G_STORE [[COPY5]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS_GFX12-NEXT: S_ENDPGM 0 ; ; NEW_RBS_GFX10-LABEL: name: salu_float ; NEW_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; NEW_RBS_GFX10-NEXT: {{ $}} - ; NEW_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; NEW_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; NEW_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; NEW_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; NEW_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] - ; NEW_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) - ; NEW_RBS_GFX10-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FPTOUI]] - ; NEW_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY2]] - ; NEW_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS_GFX10-NEXT: G_STORE [[COPY7]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; NEW_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; NEW_RBS_GFX10-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; NEW_RBS_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; NEW_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; NEW_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; NEW_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY5]], [[COPY6]] + ; NEW_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[FADD]](f32) + ; NEW_RBS_GFX10-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[FPTOUI]] + ; NEW_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY2]] + ; NEW_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS_GFX10-NEXT: G_STORE [[COPY7]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS_GFX10-NEXT: S_ENDPGM 0 ; ; NEW_RBS_GFX12-LABEL: name: salu_float ; NEW_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; NEW_RBS_GFX12-NEXT: {{ $}} - ; NEW_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; NEW_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; NEW_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] - ; NEW_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) - ; NEW_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] - ; NEW_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; NEW_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; NEW_RBS_GFX12-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; NEW_RBS_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; NEW_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + ; NEW_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(i32) = G_FPTOUI [[FADD]](f32) + ; NEW_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[FPTOUI]], [[COPY2]] + ; NEW_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS_GFX12-NEXT: G_STORE [[COPY5]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS_GFX12-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_FADD %0, %1 - %7:_(s32) = G_FPTOUI %6(s32) - %8:_(s32) = G_ADD %7, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FADD %6, %7 + %9:_(i32) = G_FPTOUI %8(f32) + %10:_(i32) = G_ADD %9, %2 + G_STORE %10(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir index 1b22ee4b3fffc..d3d5dc7812bcf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir @@ -12,41 +12,44 @@ body: | ; OLD_RBS-LABEL: name: uniform_in_vgpr ; OLD_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY5]] - ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[COPY4]](f32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[FPTOUI]], [[COPY5]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: uniform_in_vgpr ; NEW_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) - ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FPTOUI]] - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY1]] - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[COPY4]](f32) + ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[FPTOUI]] + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY1]] + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY5]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_FPTOUI %0(s32) - %6:_(s32) = G_ADD %5, %1 - G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(f32) = G_BITCAST %0(i32) + %6:_(i32) = G_FPTOUI %5(f32) + %7:_(i32) = G_ADD %6, %1 + G_STORE %7(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -60,49 +63,55 @@ body: | ; OLD_RBS-LABEL: name: back_to_back_uniform_in_vgpr ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; OLD_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] - ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) - ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] - ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; OLD_RBS-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; OLD_RBS-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; OLD_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY5]], [[COPY6]] + ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[FADD]](f32) + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[FPTOUI]], [[COPY7]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: back_to_back_uniform_in_vgpr ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; NEW_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] - ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) - ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FPTOUI]] - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY2]] - ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY7]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; NEW_RBS-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; NEW_RBS-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST1]](f32) + ; NEW_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(f32) = G_FADD [[COPY5]], [[COPY6]] + ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i32) = G_FPTOUI [[FADD]](f32) + ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[FPTOUI]] + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[AMDGPU_READANYLANE]], [[COPY2]] + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY7]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_FADD %0, %1 - %7:_(s32) = G_FPTOUI %6(s32) - %8:_(s32) = G_ADD %7, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(f32) = G_BITCAST %0(i32) + %7:_(f32) = G_BITCAST %1(i32) + %8:_(f32) = G_FADD %6, %7 + %9:_(i32) = G_FPTOUI %8(f32) + %10:_(i32) = G_ADD %9, %2 + G_STORE %10(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -116,69 +125,69 @@ body: | ; OLD_RBS-LABEL: name: buffer_load_uniform ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY9]] - ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[UV1]], [[COPY9]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: buffer_load_uniform ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV2]] - ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV3]] - ; NEW_RBS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32), [[AMDGPU_READANYLANE2]](s32), [[AMDGPU_READANYLANE3]](s32) - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[UV5]], [[C1]] - ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY9]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY $sgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[COPY4]](i32) + ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV]] + ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV1]] + ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE2:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV2]] + ; NEW_RBS-NEXT: [[AMDGPU_READANYLANE3:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[UV3]] + ; NEW_RBS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](i32), [[AMDGPU_READANYLANE1]](i32), [[AMDGPU_READANYLANE2]](i32), [[AMDGPU_READANYLANE3]](i32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32), [[UV6:%[0-9]+]]:sgpr(i32), [[UV7:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x i32>) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[UV5]], [[C1]] + ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY9]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) - %5:_(s32) = COPY $sgpr4 - %6:_(s32) = COPY $vgpr0 - %7:_(s32) = COPY $vgpr1 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - %11:_(s32) = G_CONSTANT i32 1 - %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) - %16:_(s32) = G_ADD %13, %11 - G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(i32) = COPY $sgpr4 + %6:_(i32) = COPY $vgpr0 + %7:_(i32) = COPY $vgpr1 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(<4 x i32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x i32>), %9(i32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + %11:_(i32) = G_CONSTANT i32 1 + %12:_(i32), %13:_(i32), %14:_(i32), %15:_(i32) = G_UNMERGE_VALUES %10(<4 x i32>) + %16:_(i32) = G_ADD %13, %11 + G_STORE %16(i32), %8(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -192,61 +201,61 @@ body: | ; OLD_RBS-LABEL: name: buffer_load_divergent ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] - ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[UV1]], [[COPY8]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: buffer_load_divergent ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] - ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x i32>) = G_BUILD_VECTOR [[COPY]](i32), [[COPY1]](i32), [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](i32), [[COPY6]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x i32>), [[COPY7]](i32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32), [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x i32>) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[UV1]], [[COPY8]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) - %5:_(s32) = COPY $vgpr0 - %6:_(s32) = COPY $vgpr1 - %7:_(s32) = COPY $vgpr2 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - %11:_(s32) = G_CONSTANT i32 1 - %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) - %16:_(s32) = G_ADD %13, %11 - G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(<4 x i32>) = G_BUILD_VECTOR %0(i32), %1(i32), %2(i32), %3(i32) + %5:_(i32) = COPY $vgpr0 + %6:_(i32) = COPY $vgpr1 + %7:_(i32) = COPY $vgpr2 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_CONSTANT i32 0 + %10:_(<4 x i32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x i32>), %9(i32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x i32>), align 1, addrspace 8) + %11:_(i32) = G_CONSTANT i32 1 + %12:_(i32), %13:_(i32), %14:_(i32), %15:_(i32) = G_UNMERGE_VALUES %10(<4 x i32>) + %16:_(i32) = G_ADD %13, %11 + G_STORE %16(i32), %8(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -260,53 +269,53 @@ body: | ; OLD_RBS-LABEL: name: vgpr_and_i64 ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; OLD_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; OLD_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; OLD_RBS-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; OLD_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; OLD_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; OLD_RBS-NEXT: G_STORE [[MV3]](i64), [[MV2]](p1) :: (store (i64), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: vgpr_and_i64 ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; NEW_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] - ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] - ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; NEW_RBS-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; NEW_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[UV]], [[UV2]] + ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(i32) = G_AND [[UV1]], [[UV3]] + ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[AND]](i32), [[AND1]](i32) + ; NEW_RBS-NEXT: G_STORE [[MV3]](i64), [[MV2]](p1) :: (store (i64), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s64) = G_AND %2, %5 - G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(i64) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i64) = G_AND %2, %5 + G_STORE %9(i64), %8(p1) :: (store (i64), addrspace 1) S_ENDPGM 0 ... @@ -320,42 +329,42 @@ body: | ; OLD_RBS-LABEL: name: abs_sgpr_i16 ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; OLD_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] - ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) - ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) - ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; OLD_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(i32) = G_ABS [[SEXT]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[ABS]](i32) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i16) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[ANYEXT]](i32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i16), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: abs_sgpr_i16 ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; NEW_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] - ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) - ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; NEW_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(i32) = G_ABS [[SEXT]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[ABS]](i32) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i16) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[ANYEXT]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i16), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s16) = G_ABS %1 - %6:_(s32) = G_ANYEXT %5(s16) - G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i16) = G_ABS %1 + %6:_(i32) = G_ANYEXT %5(i16) + G_STORE %6(i32), %4(p1) :: (store (i16), addrspace 1) S_ENDPGM 0 ... @@ -369,38 +378,38 @@ body: | ; OLD_RBS-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) ; OLD_RBS-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] - ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; OLD_RBS-NEXT: G_BRCOND [[ZEXT]](s32), %bb.2 + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY3]](i32), [[C1]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; OLD_RBS-NEXT: G_BRCOND [[ZEXT]](i32), %bb.2 ; OLD_RBS-NEXT: G_BR %bb.1 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.1: ; OLD_RBS-NEXT: successors: %bb.2(0x80000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ult), [[COPY2]](i32), [[C2]] + ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.2: - ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; OLD_RBS-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC3]](s1) - ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SEXT]], [[C3]] - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; OLD_RBS-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC3]](i1) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SEXT]], [[C3]] + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: uniform_i1_phi @@ -408,67 +417,71 @@ body: | ; NEW_RBS-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) ; NEW_RBS-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] - ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP1]], [[C2]] - ; NEW_RBS-NEXT: G_BRCOND [[AND]](s32), %bb.2 + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 6 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY2]](i32), [[C]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY3]](i32), [[C1]] + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP1]], [[C2]] + ; NEW_RBS-NEXT: G_BRCOND [[AND]](i32), %bb.2 ; NEW_RBS-NEXT: G_BR %bb.1 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.1: ; NEW_RBS-NEXT: successors: %bb.2(0x80000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C3]] + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ult), [[COPY2]](i32), [[C3]] ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.2: - ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ICMP]](s32), %bb.0, [[ICMP2]](s32), %bb.1 - ; NEW_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[PHI]], [[C4]] - ; NEW_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; NEW_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C5]], [[C6]] - ; NEW_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[C7]] - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ICMP]](i32), %bb.0, [[ICMP2]](i32), %bb.1 + ; NEW_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[PHI]], [[C4]] + ; NEW_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; NEW_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND1]](i32), [[C5]], [[C6]] + ; NEW_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[SELECT]], [[C7]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $sgpr0 - %4:_(s32) = COPY $sgpr1 - %5:_(s32) = G_CONSTANT i32 6 - %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7 - G_BRCOND %8(s1), %bb.2 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $sgpr0 + %4:_(i32) = COPY $sgpr1 + %5:_(i32) = G_CONSTANT i32 6 + %6:_(i1) = G_ICMP intpred(uge), %3(i32), %5 + %7:_(i32) = G_CONSTANT i32 0 + %8:_(i1) = G_ICMP intpred(ne), %4(i32), %7 + G_BRCOND %8(i1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %9:_(s32) = G_CONSTANT i32 1 - %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9 + %9:_(i32) = G_CONSTANT i32 1 + %10:_(i1) = G_ICMP intpred(ult), %3(i32), %9 bb.2: - %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 - %12:_(s32) = G_SEXT %11(s1) - %13:_(s32) = G_CONSTANT i32 2 - %14:_(s32) = G_ADD %12, %13 - G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + %11:_(i1) = G_PHI %6(i1), %bb.0, %10(i1), %bb.1 + %12:_(i32) = G_SEXT %11(i1) + %13:_(i32) = G_CONSTANT i32 2 + %14:_(i32) = G_ADD %12, %13 + G_STORE %14(i32), %2(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 + + + + ... --- @@ -481,52 +494,55 @@ body: | ; OLD_RBS-LABEL: name: vcc_to_scc ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] - ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY7]], [[COPY8]] - ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; OLD_RBS-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[C]](f32) + ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(oeq), [[COPY5]](f32), [[COPY6]] + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[FCMP]](i1), [[COPY7]], [[COPY8]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: vcc_to_scc ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] - ; NEW_RBS-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[FCMP]](s1) - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C1]] - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY7]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(f32) = G_FCONSTANT float 0.000000e+00 + ; NEW_RBS-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(f32) = COPY [[C]](f32) + ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(oeq), [[COPY5]](f32), [[COPY6]] + ; NEW_RBS-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(i32) = G_AMDGPU_COPY_SCC_VCC [[FCMP]](i1) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C1]] + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i32), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY7]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s32) = COPY $vgpr1 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_FCONSTANT float 0.000000e+00 - %7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6 - %8:_(s32) = G_SELECT %7(s1), %1, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(f32) = G_FCONSTANT float 0.000000e+00 + %7:_(f32) = G_BITCAST %0(i32) + %8:_(i1) = G_FCMP floatpred(oeq), %7(f32), %6 + %9:_(i32) = G_SELECT %8(i1), %1, %2 + G_STORE %9(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -540,45 +556,45 @@ body: | ; OLD_RBS-LABEL: name: scc_to_vcc ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] - ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY5]](i1), [[COPY1]], [[COPY2]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: scc_to_vcc ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32) - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[COPY1]], [[COPY2]] - ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(i1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](i32) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](i1), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_CONSTANT i32 0 - %7:_(s1) = G_ICMP intpred(eq), %0(s32), %6 - %8:_(s32) = G_SELECT %7(s1), %1, %2 - G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = G_CONSTANT i32 0 + %7:_(i1) = G_ICMP intpred(eq), %0(i32), %6 + %8:_(i32) = G_SELECT %7(i1), %1, %2 + G_STORE %8(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -592,43 +608,43 @@ body: | ; OLD_RBS-LABEL: name: vgpr_to_vcc_trunc ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] - ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY5]](i1), [[COPY1]], [[COPY2]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: vgpr_to_vcc_trunc ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] - ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](i32), [[COPY4]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(i32) = G_AND [[COPY]], [[C]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[AND]](i32), [[C1]] + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s1) = G_TRUNC %0(s32) - %7:_(s32) = G_SELECT %6(s1), %1, %2 - G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i32) = COPY $vgpr4 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i1) = G_TRUNC %0(i32) + %7:_(i32) = G_SELECT %6(i1), %1, %2 + G_STORE %7(i32), %5(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -642,42 +658,42 @@ body: | ; OLD_RBS-LABEL: name: zext ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ZEXT]](s32) - ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[ZEXT]](i32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: zext ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C1]] - ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]] - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP]], [[C1]] + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i32), [[C1]], [[C2]] + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 10 - %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:_(s32) = G_ZEXT %5(s1) - G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 10 + %5:_(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:_(i32) = G_ZEXT %5(i1) + G_STORE %6(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -691,43 +707,43 @@ body: | ; OLD_RBS-LABEL: name: sext ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SEXT]](s32) - ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i1) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SEXT]](i32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: sext ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C1]] - ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C2]], [[C3]] - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP]], [[C1]] + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND]](i32), [[C2]], [[C3]] + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 10 - %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:_(s32) = G_SEXT %5(s1) - G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 10 + %5:_(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:_(i32) = G_SEXT %5(i1) + G_STORE %6(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -741,52 +757,52 @@ body: | ; OLD_RBS-LABEL: name: and_i1_vcc ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] - ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] - ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY]](i32), [[COPY4]] + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY1]](i32), [[COPY5]] + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[AND]](i1), [[COPY]], [[COPY1]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: and_i1_vcc ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] - ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY]](i32), [[COPY4]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(uge), [[COPY1]](i32), [[COPY5]] + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[AND]](i1), [[COPY]], [[COPY1]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 10 - %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 - %7:_(s32) = G_CONSTANT i32 20 - %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 - %9:_(s1) = G_AND %6, %8 - %10:_(s32) = G_SELECT %9(s1), %0, %1 - G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = G_CONSTANT i32 10 + %6:_(i1) = G_ICMP intpred(uge), %0(i32), %5 + %7:_(i32) = G_CONSTANT i32 20 + %8:_(i1) = G_ICMP intpred(uge), %1(i32), %7 + %9:_(i1) = G_AND %6, %8 + %10:_(i32) = G_SELECT %9(i1), %0, %1 + G_STORE %10(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -800,58 +816,58 @@ body: | ; OLD_RBS-LABEL: name: and_i1_scc ; OLD_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] - ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) - ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC2]](s1) - ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY]], [[COPY1]] - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY]](i32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY1]](i32), [[C1]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[AND]](i32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC2]](i1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT]](i32), [[COPY]], [[COPY1]] + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: and_i1_scc ; NEW_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 - ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[ICMP1]] - ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[AND]], [[C2]] - ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[COPY1]] - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY]](i32), [[C]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 20 + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(uge), [[COPY1]](i32), [[C1]] + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP]], [[ICMP1]] + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[AND]], [[C2]] + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[AND1]](i32), [[COPY]], [[COPY1]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 10 - %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 - %7:_(s32) = G_CONSTANT i32 20 - %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 - %9:_(s1) = G_AND %6, %8 - %10:_(s32) = G_SELECT %9(s1), %0, %1 - G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(i32), %3(i32) + %5:_(i32) = G_CONSTANT i32 10 + %6:_(i1) = G_ICMP intpred(uge), %0(i32), %5 + %7:_(i32) = G_CONSTANT i32 20 + %8:_(i1) = G_ICMP intpred(uge), %1(i32), %7 + %9:_(i1) = G_AND %6, %8 + %10:_(i32) = G_SELECT %9(i1), %0, %1 + G_STORE %10(i32), %4(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -865,26 +881,26 @@ body: | ; OLD_RBS-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; OLD_RBS-NEXT: G_BR %bb.1 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.1: ; OLD_RBS-NEXT: successors: %bb.2(0x80000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.2: - ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[PHI]](s32) - ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, [[C1]](i32), %bb.1 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[PHI]](i32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: divergent_phi_with_uniform_inputs @@ -892,50 +908,50 @@ body: | ; NEW_RBS-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY4]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY4]](i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; NEW_RBS-NEXT: G_BR %bb.1 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.1: ; NEW_RBS-NEXT: successors: %bb.2(0x80000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.2: - ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF]](s32) - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY5]](s32) - ; NEW_RBS-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[C]](i32), %bb.0, [[C1]](i32), %bb.1 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF]](i32) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY5]](i32) + ; NEW_RBS-NEXT: G_STORE [[PHI]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 0 - %5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4 - %6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 0 + %5:sreg_32_xm0_xexec(i1) = G_ICMP intpred(eq), %0(i32), %4 + %6:sreg_32_xm0_xexec(i32) = SI_IF %5(i1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 bb.1: successors: %bb.2(0x80000000) - %7:_(s32) = G_CONSTANT i32 1 + %7:_(i32) = G_CONSTANT i32 1 bb.2: - %8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32) - G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + %8:_(i32) = G_PHI %4(i32), %bb.0, %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(i32) + G_STORE %8(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -949,35 +965,36 @@ body: | ; OLD_RBS-NEXT: successors: %bb.1(0x80000000) ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.1: ; OLD_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 - ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI1]], [[COPY3]] - ; OLD_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[ADD]](s32) - ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) - ; OLD_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI %7(i32), %bb.1, [[C1]](i32), %bb.0 + ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:vgpr(i32) = G_PHI [[C]](i32), %bb.0, %9(i32), %bb.1 + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C2]](i32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[PHI1]], [[COPY3]] + ; OLD_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(f32) = G_UITOFP [[ADD]](i32) + ; OLD_RBS-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI]](i32) + ; OLD_RBS-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; OLD_RBS-NEXT: G_BR %bb.2 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.2: - ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) - ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]] - ; OLD_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(i32) = G_PHI [[ADD]](i32), %bb.1 + ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.1 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i32) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C3]](i32) + ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[PHI2]], [[COPY4]] + ; OLD_RBS-NEXT: G_STORE [[MUL]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: divergent_because_of_temporal_divergent_use @@ -985,68 +1002,70 @@ body: | ; NEW_RBS-NEXT: successors: %bb.1(0x80000000) ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](i32), [[COPY2]](i32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 -1 + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.1: ; NEW_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0 - ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 - ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]] - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; NEW_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY3]](s32) - ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] - ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s32) = COPY [[INT]](s32) - ; NEW_RBS-NEXT: SI_LOOP [[COPY4]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI %18(i32), %bb.1, [[C1]](i32), %bb.0 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, %9(i32), %bb.1 + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(i32) = G_ADD [[PHI1]], [[C2]] + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[ADD]](i32) + ; NEW_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(f32) = G_UITOFP [[COPY3]](i32) + ; NEW_RBS-NEXT: [[BITCAST:%[0-9]+]]:vgpr(f32) = G_BITCAST [[COPY]](i32) + ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(i1) = G_FCMP floatpred(ogt), [[UITOFP]](f32), [[BITCAST]] + ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](i1), [[PHI]](i32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(i32) = COPY [[INT]](i32) + ; NEW_RBS-NEXT: SI_LOOP [[COPY4]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; NEW_RBS-NEXT: G_BR %bb.2 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.2: - ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) - ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY5]] - ; NEW_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(i32) = G_PHI [[ADD]](i32), %bb.1 + ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.1 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](i32) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C3]](i32) + ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[PHI2]], [[COPY5]] + ; NEW_RBS-NEXT: G_STORE [[MUL]](i32), [[MV]](p1) :: (store (i32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 -1 - %5:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i32) = G_CONSTANT i32 -1 + %5:_(i32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 - %10:_(s32) = G_CONSTANT i32 1 - %9:_(s32) = G_ADD %8, %10 - %11:_(s32) = G_UITOFP %9(s32) - %12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0 - %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) - SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %6:_(i32) = G_PHI %7(i32), %bb.1, %5(i32), %bb.0 + %8:_(i32) = G_PHI %4(i32), %bb.0, %9(i32), %bb.1 + %10:_(i32) = G_CONSTANT i32 1 + %9:_(i32) = G_ADD %8, %10 + %11:_(f32) = G_UITOFP %9(i32) + %17:_(f32) = G_BITCAST %0(i32) + %12:_(i1) = G_FCMP floatpred(ogt), %11(f32), %17 + %7:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(i1), %6(i32) + SI_LOOP %7(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: - %13:_(s32) = G_PHI %9(s32), %bb.1 - %14:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - %15:_(s32) = G_CONSTANT i32 10 - %16:_(s32) = G_MUL %13, %15 - G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + %13:_(i32) = G_PHI %9(i32), %bb.1 + %14:_(i32) = G_PHI %7(i32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(i32) + %15:_(i32) = G_CONSTANT i32 10 + %16:_(i32) = G_MUL %13, %15 + G_STORE %16(i32), %3(p1) :: (store (i32), addrspace 1) S_ENDPGM 0 ... @@ -1060,115 +1079,115 @@ body: | ; OLD_RBS-NEXT: successors: %bb.1(0x80000000) ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; OLD_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; OLD_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.1: ; OLD_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 - ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 - ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 - ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[PHI2]](s32) - ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; OLD_RBS-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY7]], [[C1]](s32) - ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY7]](s32), [[ASHR]](s32) - ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; OLD_RBS-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY8]](s32) - ; OLD_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) - ; OLD_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY9]] - ; OLD_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C4]](s32) - ; OLD_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC]](s1) - ; OLD_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; OLD_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; OLD_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; OLD_RBS-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %13(i1), %bb.3 + ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI %15(i32), %bb.3, [[C]](i32), %bb.0 + ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(i32) = G_PHI [[C]](i32), %bb.0, %17(i32), %bb.3 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i32) = COPY [[PHI2]](i32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; OLD_RBS-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[COPY7]], [[C1]](i32) + ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY7]](i32), [[ASHR]](i32) + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[C2]](i32) + ; OLD_RBS-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[MV3]], [[COPY8]](i32) + ; OLD_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](i64) + ; OLD_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(i32) = COPY [[C3]](i32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[COPY9]] + ; OLD_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C4]](i32) + ; OLD_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[TRUNC]](i1) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY10]](i1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; OLD_RBS-NEXT: [[COPY11:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; OLD_RBS-NEXT: G_BR %bb.2 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.2: ; OLD_RBS-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; OLD_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C5]](s32) - ; OLD_RBS-NEXT: [[SHL1:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY12]](s32) - ; OLD_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) - ; OLD_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; OLD_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; OLD_RBS-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) - ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY13]] - ; OLD_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C7]](s32) - ; OLD_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC1]](s1) - ; OLD_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) - ; OLD_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[C5]](i32) + ; OLD_RBS-NEXT: [[SHL1:%[0-9]+]]:vgpr(i64) = G_SHL [[MV3]], [[COPY12]](i32) + ; OLD_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](i64) + ; OLD_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; OLD_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY13:%[0-9]+]]:vgpr(i32) = COPY [[C6]](i32) + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), [[LOAD1]](i32), [[COPY13]] + ; OLD_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C7]](i32) + ; OLD_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[TRUNC1]](i1) + ; OLD_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[COPY14]](i1) + ; OLD_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[ICMP1]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; OLD_RBS-NEXT: G_BR %bb.4 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.3: ; OLD_RBS-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 - ; OLD_RBS-NEXT: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 - ; OLD_RBS-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI1]](s32) - ; OLD_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, %43(i1), %bb.5 + ; OLD_RBS-NEXT: [[PHI4:%[0-9]+]]:vgpr(i32) = G_PHI %44(i32), %bb.5, [[DEF]](i32), %bb.1 + ; OLD_RBS-NEXT: [[COPY16:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](i32) + ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](i1), [[PHI1]](i32) + ; OLD_RBS-NEXT: SI_LOOP [[INT]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; OLD_RBS-NEXT: G_BR %bb.6 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.4: ; OLD_RBS-NEXT: successors: %bb.5(0x80000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; OLD_RBS-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[C8]](s32) - ; OLD_RBS-NEXT: [[SHL2:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY17]](s32) - ; OLD_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) - ; OLD_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; OLD_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; OLD_RBS-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) - ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY18]] - ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) - ; OLD_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) - ; OLD_RBS-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI2]], [[COPY19]] - ; OLD_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 - ; OLD_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C10]](s32) - ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY20]] - ; OLD_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; OLD_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc - ; OLD_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc - ; OLD_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY17:%[0-9]+]]:vgpr(i32) = COPY [[C8]](i32) + ; OLD_RBS-NEXT: [[SHL2:%[0-9]+]]:vgpr(i64) = G_SHL [[MV3]], [[COPY17]](i32) + ; OLD_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](i64) + ; OLD_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32), addrspace 1) + ; OLD_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[COPY18:%[0-9]+]]:vgpr(i32) = COPY [[C9]](i32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[LOAD2]], [[COPY18]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD2]](p1) :: (store (i32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(i32) = COPY [[C9]](i32) + ; OLD_RBS-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[PHI2]], [[COPY19]] + ; OLD_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 100 + ; OLD_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(i32) = COPY [[C10]](i32) + ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ult), [[PHI2]](i32), [[COPY20]] + ; OLD_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(i1) = COPY [[ICMP2]](i1) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY15]](i1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY21]](i1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.5: ; OLD_RBS-NEXT: successors: %bb.3(0x80000000) ; OLD_RBS-NEXT: {{ $}} - ; OLD_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; OLD_RBS-NEXT: [[PHI6:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; OLD_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; OLD_RBS-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[COPY22]](s1) - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; OLD_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; OLD_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY23]](s1), implicit-def $scc - ; OLD_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[COPY14]](i1), %bb.2, [[S_OR_B32_1]](i1), %bb.4 + ; OLD_RBS-NEXT: [[PHI6:%[0-9]+]]:vgpr(i32) = G_PHI [[ADD1]](i32), %bb.4, [[DEF]](i32), %bb.2 + ; OLD_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; OLD_RBS-NEXT: [[COPY23:%[0-9]+]]:sreg_32(i1) = COPY [[COPY22]](i1) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](i32) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY11]](i1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY23]](i1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; OLD_RBS-NEXT: G_BR %bb.3 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.6: - ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.3 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: loop_with_2breaks @@ -1176,217 +1195,217 @@ body: | ; NEW_RBS-NEXT: successors: %bb.1(0x80000000) ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; NEW_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY $vgpr5 + ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](i32), [[COPY5]](i32) + ; NEW_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(i32) = G_IMPLICIT_DEF + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(i1) = IMPLICIT_DEF ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.1: ; NEW_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 - ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0 - ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 - ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 - ; NEW_RBS-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[PHI2]], [[C1]](s32) - ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[PHI2]](s32), [[ASHR]](s32) - ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; NEW_RBS-NEXT: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[MV3]], [[C2]](s32) - ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s64) = COPY [[SHL]](s64) - ; NEW_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[COPY7]](s64) - ; NEW_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY8]] - ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) - ; NEW_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:sreg_32(s1) = G_AMDGPU_COPY_VCC_SCC [[C4]](s32) - ; NEW_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; NEW_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC]](s1), implicit-def $scc - ; NEW_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; NEW_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY9]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(i1) = PHI [[DEF1]](i1), %bb.0, %13(i1), %bb.3 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(i32) = G_PHI %68(i32), %bb.3, [[C]](i32), %bb.0 + ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:sgpr(i32) = G_PHI [[C]](i32), %bb.0, %17(i32), %bb.3 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(i1) = COPY [[PHI]](i1) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 31 + ; NEW_RBS-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[PHI2]], [[C1]](i32) + ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[PHI2]](i32), [[ASHR]](i32) + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[SHL:%[0-9]+]]:sgpr(i64) = G_SHL [[MV3]], [[C2]](i32) + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(i64) = COPY [[SHL]](i64) + ; NEW_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[COPY7]](i64) + ; NEW_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD]](p1) :: (load (i32), addrspace 1) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(i32) = COPY [[C3]](i32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[LOAD]](i32), [[COPY8]] + ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP]](i1) + ; NEW_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:sreg_32(i1) = G_AMDGPU_COPY_VCC_SCC [[C4]](i32) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY6]](i1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC]](i1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_]](i1), [[S_AND_B32_]](i1), implicit-def $scc + ; NEW_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(i1) = COPY [[S_OR_B32_]](i1) + ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY9]](i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; NEW_RBS-NEXT: G_BR %bb.2 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.2: ; NEW_RBS-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; NEW_RBS-NEXT: [[SHL1:%[0-9]+]]:sgpr(s64) = G_SHL [[MV3]], [[C5]](s32) - ; NEW_RBS-NEXT: [[COPY11:%[0-9]+]]:vgpr(s64) = COPY [[SHL1]](s64) - ; NEW_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[COPY11]](s64) - ; NEW_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; NEW_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; NEW_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) - ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY12]] - ; NEW_RBS-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP1]](s1) - ; NEW_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC1:%[0-9]+]]:sreg_32(s1) = G_AMDGPU_COPY_VCC_SCC [[C7]](s32) - ; NEW_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[AMDGPU_COPY_VCC_SCC1]](s1) - ; NEW_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY13]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[SHL1:%[0-9]+]]:sgpr(i64) = G_SHL [[MV3]], [[C5]](i32) + ; NEW_RBS-NEXT: [[COPY11:%[0-9]+]]:vgpr(i64) = COPY [[SHL1]](i64) + ; NEW_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[COPY11]](i64) + ; NEW_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD1]](p1) :: (load (i32), addrspace 1) + ; NEW_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(i32) = COPY [[C6]](i32) + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[LOAD1]](i32), [[COPY12]] + ; NEW_RBS-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(i1) = COPY [[ICMP1]](i1) + ; NEW_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC1:%[0-9]+]]:sreg_32(i1) = G_AMDGPU_COPY_VCC_SCC [[C7]](i32) + ; NEW_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(i1) = COPY [[AMDGPU_COPY_VCC_SCC1]](i1) + ; NEW_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(i32) = SI_IF [[COPY13]](i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; NEW_RBS-NEXT: G_BR %bb.4 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.3: ; NEW_RBS-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 - ; NEW_RBS-NEXT: [[PHI4:%[0-9]+]]:sgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 - ; NEW_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; NEW_RBS-NEXT: [[COPY16:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF]](s32) - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY16]](s32) - ; NEW_RBS-NEXT: [[COPY17:%[0-9]+]]:vcc(s1) = COPY [[COPY15]](s1) - ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI1]](s32) - ; NEW_RBS-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s32) = COPY [[INT]](s32) - ; NEW_RBS-NEXT: SI_LOOP [[COPY18]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(i1) = PHI [[S_OR_B32_]](i1), %bb.1, %43(i1), %bb.5 + ; NEW_RBS-NEXT: [[PHI4:%[0-9]+]]:sgpr(i32) = G_PHI %44(i32), %bb.5, [[DEF]](i32), %bb.1 + ; NEW_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(i1) = COPY [[PHI3]](i1) + ; NEW_RBS-NEXT: [[COPY16:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF]](i32) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY16]](i32) + ; NEW_RBS-NEXT: [[COPY17:%[0-9]+]]:vcc(i1) = COPY [[COPY15]](i1) + ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sgpr(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](i1), [[PHI1]](i32) + ; NEW_RBS-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(i32) = COPY [[INT]](i32) + ; NEW_RBS-NEXT: SI_LOOP [[COPY18]](i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; NEW_RBS-NEXT: G_BR %bb.6 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.4: ; NEW_RBS-NEXT: successors: %bb.5(0x80000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; NEW_RBS-NEXT: [[SHL2:%[0-9]+]]:sgpr(s64) = G_SHL [[MV3]], [[C8]](s32) - ; NEW_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(s64) = COPY [[SHL2]](s64) - ; NEW_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[COPY19]](s64) - ; NEW_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; NEW_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; NEW_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) - ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY20]] - ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) - ; NEW_RBS-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI2]], [[C9]] - ; NEW_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 - ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[PHI2]](s32), [[C10]] - ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP2]], [[C9]] - ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC2:%[0-9]+]]:sreg_32(s1) = G_AMDGPU_COPY_VCC_SCC [[AND]](s32) - ; NEW_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc - ; NEW_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC2]](s1), implicit-def $scc - ; NEW_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[SHL2:%[0-9]+]]:sgpr(i64) = G_SHL [[MV3]], [[C8]](i32) + ; NEW_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(i64) = COPY [[SHL2]](i64) + ; NEW_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[COPY19]](i64) + ; NEW_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(i32) = G_LOAD [[PTR_ADD2]](p1) :: (load (i32), addrspace 1) + ; NEW_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(i32) = COPY [[C9]](i32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[LOAD2]], [[COPY20]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](i32), [[PTR_ADD2]](p1) :: (store (i32), addrspace 1) + ; NEW_RBS-NEXT: [[ADD1:%[0-9]+]]:sgpr(i32) = G_ADD [[PHI2]], [[C9]] + ; NEW_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 100 + ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ult), [[PHI2]](i32), [[C10]] + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[ICMP2]], [[C9]] + ; NEW_RBS-NEXT: [[AMDGPU_COPY_VCC_SCC2:%[0-9]+]]:sreg_32(i1) = G_AMDGPU_COPY_VCC_SCC [[AND]](i32) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY14]](i1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[AMDGPU_COPY_VCC_SCC2]](i1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_1]](i1), [[S_AND_B32_1]](i1), implicit-def $scc ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.5: ; NEW_RBS-NEXT: successors: %bb.3(0x80000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[AMDGPU_COPY_VCC_SCC1]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; NEW_RBS-NEXT: [[PHI6:%[0-9]+]]:sgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; NEW_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; NEW_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[COPY21]](s1) - ; NEW_RBS-NEXT: [[COPY23:%[0-9]+]]:sgpr(s32) = COPY [[SI_IF1]](s32) - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY23]](s32) - ; NEW_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; NEW_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY22]](s1), implicit-def $scc - ; NEW_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(i1) = PHI [[AMDGPU_COPY_VCC_SCC1]](i1), %bb.2, [[S_OR_B32_1]](i1), %bb.4 + ; NEW_RBS-NEXT: [[PHI6:%[0-9]+]]:sgpr(i32) = G_PHI [[ADD1]](i32), %bb.4, [[DEF]](i32), %bb.2 + ; NEW_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(i1) = COPY [[PHI5]](i1) + ; NEW_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(i1) = COPY [[COPY21]](i1) + ; NEW_RBS-NEXT: [[COPY23:%[0-9]+]]:sgpr(i32) = COPY [[SI_IF1]](i32) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[COPY23]](i32) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(i1) = S_ANDN2_B32 [[COPY10]](i1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(i1) = S_AND_B32 $exec_lo, [[COPY22]](i1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(i1) = S_OR_B32 [[S_ANDN2_B32_2]](i1), [[S_AND_B32_2]](i1), implicit-def $scc ; NEW_RBS-NEXT: G_BR %bb.3 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.6: - ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(i32) = G_PHI [[INT]](i32), %bb.3 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](i32) ; NEW_RBS-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) - %3:_(s32) = COPY $vgpr2 - %4:_(s32) = COPY $vgpr3 - %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = COPY $vgpr4 - %7:_(s32) = COPY $vgpr5 - %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_IMPLICIT_DEF - %10:_(s32) = G_CONSTANT i32 0 - %11:sreg_32(s1) = IMPLICIT_DEF + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(i32), %1(i32) + %3:_(i32) = COPY $vgpr2 + %4:_(i32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(i32), %4(i32) + %6:_(i32) = COPY $vgpr4 + %7:_(i32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(i32), %7(i32) + %9:_(i32) = G_IMPLICIT_DEF + %10:_(i32) = G_CONSTANT i32 0 + %11:sreg_32(i1) = IMPLICIT_DEF bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 - %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 - %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 - %18:sreg_32(s1) = COPY %12(s1) - %19:_(s64) = G_SEXT %16(s32) - %20:_(s32) = G_CONSTANT i32 2 - %21:_(s64) = G_SHL %19, %20(s32) - %22:_(p1) = G_PTR_ADD %5, %21(s64) - %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) - %24:_(s32) = G_CONSTANT i32 0 - %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 - %26:_(s1) = G_CONSTANT i1 true - %27:sreg_32(s1) = COPY %26(s1) - %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc - %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc - %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc - %31:sreg_32(s1) = COPY %30(s1) - %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %12:sreg_32(i1) = PHI %11(i1), %bb.0, %13(i1), %bb.3 + %14:_(i32) = G_PHI %15(i32), %bb.3, %10(i32), %bb.0 + %16:_(i32) = G_PHI %10(i32), %bb.0, %17(i32), %bb.3 + %18:sreg_32(i1) = COPY %12(i1) + %19:_(i64) = G_SEXT %16(i32) + %20:_(i32) = G_CONSTANT i32 2 + %21:_(i64) = G_SHL %19, %20(i32) + %22:_(p1) = G_PTR_ADD %5, %21(i64) + %23:_(i32) = G_LOAD %22(p1) :: (load (i32), addrspace 1) + %24:_(i32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %23(i32), %24 + %26:_(i1) = G_CONSTANT i1 true + %27:sreg_32(i1) = COPY %26(i1) + %28:sreg_32(i1) = S_ANDN2_B32 %18(i1), $exec_lo, implicit-def $scc + %29:sreg_32(i1) = S_AND_B32 $exec_lo, %27(i1), implicit-def $scc + %30:sreg_32(i1) = S_OR_B32 %28(i1), %29(i1), implicit-def $scc + %31:sreg_32(i1) = COPY %30(i1) + %32:sreg_32_xm0_xexec(i32) = SI_IF %25(i1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %33:_(s32) = G_CONSTANT i32 2 - %34:_(s64) = G_SHL %19, %33(s32) - %35:_(p1) = G_PTR_ADD %8, %34(s64) - %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) - %37:_(s32) = G_CONSTANT i32 0 - %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 - %39:_(s1) = G_CONSTANT i1 true - %40:sreg_32(s1) = COPY %39(s1) - %41:sreg_32(s1) = COPY %40(s1) - %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %33:_(i32) = G_CONSTANT i32 2 + %34:_(i64) = G_SHL %19, %33(i32) + %35:_(p1) = G_PTR_ADD %8, %34(i64) + %36:_(i32) = G_LOAD %35(p1) :: (load (i32), addrspace 1) + %37:_(i32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(i1) = G_ICMP intpred(ne), %36(i32), %37 + %39:_(i1) = G_CONSTANT i1 true + %40:sreg_32(i1) = COPY %39(i1) + %41:sreg_32(i1) = COPY %40(i1) + %42:sreg_32_xm0_xexec(i32) = SI_IF %38(i1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 - %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 - %45:sreg_32(s1) = COPY %13(s1) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) - %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) - SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + %13:sreg_32(i1) = PHI %30(i1), %bb.1, %43(i1), %bb.5 + %17:_(i32) = G_PHI %44(i32), %bb.5, %9(i32), %bb.1 + %45:sreg_32(i1) = COPY %13(i1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(i32) + %15:sreg_32_xm0_xexec(i32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(i1), %14(i32) + SI_LOOP %15(i32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.4: successors: %bb.5(0x80000000) - %46:_(s32) = G_CONSTANT i32 2 - %47:_(s64) = G_SHL %19, %46(s32) - %48:_(p1) = G_PTR_ADD %2, %47(s64) - %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) - %50:_(s32) = G_CONSTANT i32 1 - %51:_(s32) = G_ADD %49, %50 - G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) - %52:_(s32) = G_ADD %16, %50 - %53:_(s32) = G_CONSTANT i32 100 - %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 - %55:sreg_32(s1) = COPY %54(s1) - %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc - %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc - %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + %46:_(i32) = G_CONSTANT i32 2 + %47:_(i64) = G_SHL %19, %46(i32) + %48:_(p1) = G_PTR_ADD %2, %47(i64) + %49:_(i32) = G_LOAD %48(p1) :: (load (i32), addrspace 1) + %50:_(i32) = G_CONSTANT i32 1 + %51:_(i32) = G_ADD %49, %50 + G_STORE %51(i32), %48(p1) :: (store (i32), addrspace 1) + %52:_(i32) = G_ADD %16, %50 + %53:_(i32) = G_CONSTANT i32 100 + %54:_(i1) = G_ICMP intpred(ult), %16(i32), %53 + %55:sreg_32(i1) = COPY %54(i1) + %56:sreg_32(i1) = S_ANDN2_B32 %41(i1), $exec_lo, implicit-def $scc + %57:sreg_32(i1) = S_AND_B32 $exec_lo, %55(i1), implicit-def $scc + %58:sreg_32(i1) = S_OR_B32 %56(i1), %57(i1), implicit-def $scc bb.5: successors: %bb.3(0x80000000) - %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 - %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 - %60:sreg_32(s1) = COPY %59(s1) - %61:sreg_32(s1) = COPY %60(s1) - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) - %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc - %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc - %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + %59:sreg_32(i1) = PHI %40(i1), %bb.2, %58(i1), %bb.4 + %44:_(i32) = G_PHI %52(i32), %bb.4, %9(i32), %bb.2 + %60:sreg_32(i1) = COPY %59(i1) + %61:sreg_32(i1) = COPY %60(i1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(i32) + %62:sreg_32(i1) = S_ANDN2_B32 %31(i1), $exec_lo, implicit-def $scc + %63:sreg_32(i1) = S_AND_B32 $exec_lo, %61(i1), implicit-def $scc + %43:sreg_32(i1) = S_OR_B32 %62(i1), %63(i1), implicit-def $scc G_BR %bb.3 bb.6: - %64:_(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + %64:_(i32) = G_PHI %15(i32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(i32) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir index 6b91707328dba..ecbc9414156c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir @@ -11,12 +11,12 @@ body: | ; CHECK-LABEL: name: mul_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_MUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(i32) = G_MUL [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_MUL %0, %1 ... --- @@ -29,13 +29,13 @@ body: | ; CHECK-LABEL: name: mul_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_MUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_MUL %0, %1 ... --- @@ -48,13 +48,13 @@ body: | ; CHECK-LABEL: name: mul_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_MUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_MUL %0, %1 ... --- @@ -67,12 +67,12 @@ body: | ; CHECK-LABEL: name: mul_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_MUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_MUL %0, %1 ... --- @@ -85,12 +85,12 @@ body: | ; CHECK-LABEL: name: mul_s64_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(s64) = G_MUL [[COPY]], [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_MUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(i64) = G_MUL [[COPY]], [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_MUL %0, %1 ... --- @@ -103,20 +103,20 @@ body: | ; CHECK-LABEL: name: mul_s64_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[UV]], [[UV2]] - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[UV]], [[UV3]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UMULH]], [[MUL]] - ; CHECK-NEXT: [[MUL1:%[0-9]+]]:vgpr(s32) = G_MUL [[UV1]], [[UV2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[MUL1]] - ; CHECK-NEXT: [[MUL2:%[0-9]+]]:vgpr(s32) = G_MUL [[UV]], [[UV2]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[MUL2]](s32), [[ADD1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_MUL %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[UV]], [[UV2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(i32) = G_MUL [[UV]], [[UV3]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(i32) = G_ADD [[UMULH]], [[MUL]] + ; CHECK-NEXT: [[MUL1:%[0-9]+]]:vgpr(i32) = G_MUL [[UV1]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(i32) = G_ADD [[ADD]], [[MUL1]] + ; CHECK-NEXT: [[MUL2:%[0-9]+]]:vgpr(i32) = G_MUL [[UV]], [[UV2]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[MUL2]](i32), [[ADD1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_MUL %0, %1 ... --- @@ -129,12 +129,12 @@ body: | ; CHECK-LABEL: name: mul_s64_zext_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[S_MUL_U64_:%[0-9]+]]:sgpr_64(s64) = S_MUL_U64 [[COPY]](s64), [[COPY1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_AMDGPU_S_MUL_U64_U32 %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[S_MUL_U64_:%[0-9]+]]:sgpr_64(i64) = S_MUL_U64 [[COPY]](i64), [[COPY1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_AMDGPU_S_MUL_U64_U32 %0, %1 ... --- @@ -147,15 +147,15 @@ body: | ; CHECK-LABEL: name: mul_s64_zext_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr_32(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr_32(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vreg_64(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vreg_64 = G_AMDGPU_MAD_U64_U32 [[TRUNC]](s32), [[TRUNC1]], [[C]] - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_AMDGPU_S_MUL_U64_U32 %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr_32(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr_32(i32) = G_TRUNC [[COPY1]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vreg_64(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vreg_64 = G_AMDGPU_MAD_U64_U32 [[TRUNC]](i32), [[TRUNC1]], [[C]] + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_AMDGPU_S_MUL_U64_U32 %0, %1 ... --- @@ -168,12 +168,12 @@ body: | ; CHECK-LABEL: name: mul_s64_sext_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[S_MUL_U64_:%[0-9]+]]:sgpr_64(s64) = S_MUL_U64 [[COPY]](s64), [[COPY1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_AMDGPU_S_MUL_I64_I32 %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[S_MUL_U64_:%[0-9]+]]:sgpr_64(i64) = S_MUL_U64 [[COPY]](i64), [[COPY1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_AMDGPU_S_MUL_I64_I32 %0, %1 ... --- @@ -186,13 +186,13 @@ body: | ; CHECK-LABEL: name: mul_s64_sext_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr_32(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr_32(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vreg_64(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vreg_64 = G_AMDGPU_MAD_I64_I32 [[TRUNC]](s32), [[TRUNC1]], [[C]] - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_AMDGPU_S_MUL_I64_I32 %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr_32(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr_32(i32) = G_TRUNC [[COPY1]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vreg_64(i64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(i64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vreg_64 = G_AMDGPU_MAD_I64_I32 [[TRUNC]](i32), [[TRUNC1]], [[C]] + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_AMDGPU_S_MUL_I64_I32 %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir index da3b24c404efe..006cee4c6140c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: or_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(i32) = G_OR [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_OR %0, %1 ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: or_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_OR %0, %1 ... --- @@ -49,13 +49,13 @@ body: | ; CHECK-LABEL: name: or_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_OR %0, %1 ... --- @@ -68,12 +68,12 @@ body: | ; CHECK-LABEL: name: or_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_OR %0, %1 ... --- @@ -86,25 +86,25 @@ body: | ; CHECK-LABEL: name: or_i1_scc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(i32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[OR]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i1) = G_OR %3, %4 + S_NOP 0, implicit %5(i1) ... --- @@ -117,22 +117,22 @@ body: | ; CHECK-LABEL: name: or_i1_vcc_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(i1) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i1) = G_OR %3, %4 + S_NOP 0, implicit %5(i1) ... --- @@ -145,23 +145,23 @@ body: | ; CHECK-LABEL: name: or_i1_scc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP1]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(i1) = G_OR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i1) = G_OR %3, %4 + S_NOP 0, implicit %5(i1) ... --- @@ -173,21 +173,21 @@ body: | ; CHECK-LABEL: name: or_i1_sgpr_trunc_sgpr_trunc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_OR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(i32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[OR]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:_(i1) = G_OR %2, %3 + S_NOP 0, implicit %4(i1) ... @@ -200,22 +200,22 @@ body: | ; CHECK-LABEL: name: or_i1_trunc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_ICMP intpred(ne), %0, %1 - %4:_(s1) = G_OR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(i32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[OR]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %4:_(i1) = G_OR %2, %3 + S_NOP 0, implicit %4(i1) ... --- @@ -227,20 +227,20 @@ body: | ; CHECK-LABEL: name: or_i1_s_trunc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_ICMP intpred(ne), %0, %1 - %4:_(s1) = G_OR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(i1) = G_OR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %4:_(i1) = G_OR %2, %3 + S_NOP 0, implicit %4(i1) ... --- @@ -253,12 +253,12 @@ body: | ; CHECK-LABEL: name: or_s64_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s64) = G_OR [[COPY]], [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(i64) = G_OR [[COPY]], [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_OR %0, %1 ... --- @@ -271,16 +271,16 @@ body: | ; CHECK-LABEL: name: or_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s64) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i64) = G_OR %0, %1 ... --- @@ -293,16 +293,16 @@ body: | ; CHECK-LABEL: name: or_s64_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $sgpr0_sgpr1 - %2:_(s64) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $sgpr0_sgpr1 + %2:_(i64) = G_OR %0, %1 ... --- @@ -315,16 +315,16 @@ body: | ; CHECK-LABEL: name: or_s64_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_OR %0, %1 ... --- @@ -337,16 +337,16 @@ body: | ; CHECK-LABEL: name: or_s64_vv_flags ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: %3:vgpr(s32) = disjoint G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: %4:vgpr(s32) = disjoint G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES %3(s32), %4(s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = disjoint G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = disjoint G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = disjoint G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = disjoint G_OR %0, %1 ... --- @@ -359,18 +359,18 @@ body: | ; CHECK-LABEL: name: or_s64_vv_user ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_OR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_OR %0, %1 + S_NOP 0, implicit %2(i64) ... --- name: or_s64_ss_ss_merge @@ -382,22 +382,22 @@ body: | ; CHECK-LABEL: name: or_s64_ss_ss_merge ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s64) = G_OR [[MV]], [[MV1]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %0, %1 - %5:_(s64) = G_MERGE_VALUES %2, %3 - %6:_(s64) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(i64) = G_OR [[MV]], [[MV1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %5:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %6:_(i64) = G_OR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -410,26 +410,26 @@ body: | ; CHECK-LABEL: name: or_s64_vv_vv_merge ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s64) = G_MERGE_VALUES %0, %1 - %5:_(s64) = G_MERGE_VALUES %2, %3 - %6:_(s64) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %5:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %6:_(i64) = G_OR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -442,23 +442,23 @@ body: | ; CHECK-LABEL: name: or_s64_s_sv_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 - %3:_(s64) = G_MERGE_VALUES %1, %2 - %4:_(s64) = G_OR %0, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $vgpr0 + %3:_(i64) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i64) = G_OR %0, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -471,23 +471,23 @@ body: | ; CHECK-LABEL: name: or_s64_s_vs_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 - %3:_(s64) = G_MERGE_VALUES %2, %1 - %4:_(s64) = G_OR %0, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $vgpr0 + %3:_(i64) = G_MERGE_VALUES %2(i32), %1(i32) + %4:_(i64) = G_OR %0, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -500,28 +500,28 @@ body: | ; CHECK-LABEL: name: or_s64_sv_sv_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %0, %2 - %5:_(s64) = G_MERGE_VALUES %1, %3 - %6:_(s64) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY5]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %0(i32), %2(i32) + %5:_(i64) = G_MERGE_VALUES %1(i32), %3(i32) + %6:_(i64) = G_OR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -534,28 +534,28 @@ body: | ; CHECK-LABEL: name: or_s64_sv_vs_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %0, %2 - %5:_(s64) = G_MERGE_VALUES %3, %1 - %6:_(s64) = G_OR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %0(i32), %2(i32) + %5:_(i64) = G_MERGE_VALUES %3(i32), %1(i32) + %6:_(i64) = G_OR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -568,26 +568,26 @@ body: | ; CHECK-LABEL: name: or_chain_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:vgpr(s32) = G_OR [[UV4]], [[UV6]] - ; CHECK-NEXT: [[OR3:%[0-9]+]]:vgpr(s32) = G_OR [[UV5]], [[UV7]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = COPY $vgpr0_vgpr1 - %3:_(s64) = G_OR %0, %2 - %4:_(s64) = G_OR %1, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:vgpr(i32) = G_OR [[UV4]], [[UV6]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:vgpr(i32) = G_OR [[UV5]], [[UV7]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[OR2]](i32), [[OR3]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = COPY $vgpr0_vgpr1 + %3:_(i64) = G_OR %0, %2 + %4:_(i64) = G_OR %1, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -600,14 +600,14 @@ body: | ; CHECK-LABEL: name: or_v2i32_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(<2 x s32>) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[OR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:_(<2 x s32>) = G_OR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(<2 x i32>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:_(<2 x i32>) = G_OR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -620,18 +620,18 @@ body: | ; CHECK-LABEL: name: or_v2i32_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s32>) = G_OR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x i32>) = G_OR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -645,18 +645,18 @@ body: | ; CHECK-LABEL: name: or_v2i32_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %2:_(<2 x s32>) = G_OR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %2:_(<2 x i32>) = G_OR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -669,18 +669,18 @@ body: | ; CHECK-LABEL: name: or_v2i32_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_OR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(i32) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(i32) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[OR]](i32), [[OR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_OR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -693,12 +693,12 @@ body: | ; CHECK-LABEL: name: or_v4s16_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(<4 x s16>) = G_OR [[COPY]], [[COPY1]] - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:_(<4 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(<4 x i16>) = G_OR [[COPY]], [[COPY1]] + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:_(<4 x i16>) = G_OR %0, %1 ... --- @@ -711,16 +711,16 @@ body: | ; CHECK-LABEL: name: or_v4s16_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %2:_(<4 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x i16>), [[UV1:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[OR]](<2 x i16>), [[OR1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %2:_(<4 x i16>) = G_OR %0, %1 ... --- @@ -733,16 +733,16 @@ body: | ; CHECK-LABEL: name: or_v4s16_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %2:_(<4 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x i16>), [[UV3:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[OR]](<2 x i16>), [[OR1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %2:_(<4 x i16>) = G_OR %0, %1 ... --- @@ -755,16 +755,16 @@ body: | ; CHECK-LABEL: name: or_v4s16_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[OR]](<2 x i16>), [[OR1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_OR %0, %1 ... --- @@ -777,12 +777,12 @@ body: | ; CHECK-LABEL: name: or_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(<2 x i16>) = G_OR [[COPY]], [[COPY1]] + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_OR %0, %1 ... --- @@ -795,13 +795,13 @@ body: | ; CHECK-LABEL: name: or_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY2]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[COPY2]], [[COPY1]] + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_OR %0, %1 ... --- @@ -814,13 +814,13 @@ body: | ; CHECK-LABEL: name: or_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY2]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[COPY]], [[COPY2]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_OR %0, %1 ... --- @@ -833,10 +833,10 @@ body: | ; CHECK-LABEL: name: or_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_OR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x i16>) = G_OR [[COPY]], [[COPY1]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_OR %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir index 39e421c1b8f94..5b11e251c4d1f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir @@ -13,88 +13,93 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -109,132 +114,139 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.3 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.3 ; FAST-NEXT: G_BR %bb.1 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99 - ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888 - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP3]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT1]](s32), %bb.3 + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 99 + ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 888 + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP3]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT1]](i32), %bb.3 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; FAST-NEXT: [[ICMP4:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; FAST-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP4]](s32) - ; FAST-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC4]](s1) + ; FAST-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 123 + ; FAST-NEXT: [[ICMP4:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; FAST-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP4]](i32) + ; FAST-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC4]](i1) ; FAST-NEXT: G_BR %bb.3 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1, [[ANYEXT2]](s32), %bb.2 - ; FAST-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC5]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT2]](s32), [[COPY]], [[COPY1]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1, [[ANYEXT2]](i32), %bb.2 + ; FAST-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC5]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT2]](i32), [[COPY]], [[COPY1]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_scc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.3 ; GREEDY-NEXT: G_BR %bb.1 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888 - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP3]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT1]](s32), %bb.3 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 99 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 888 + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP3]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT1]](i32), %bb.3 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; GREEDY-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP4]](s32) - ; GREEDY-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC4]](s1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 123 + ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; GREEDY-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP4]](i32) + ; GREEDY-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC4]](i1) ; GREEDY-NEXT: G_BR %bb.3 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1, [[ANYEXT2]](s32), %bb.2 - ; GREEDY-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC5]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT2]](s32), [[COPY]], [[COPY1]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1, [[ANYEXT2]](i32), %bb.2 + ; GREEDY-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC5]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT2]](i32), [[COPY]], [[COPY1]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.3 + successors: %bb.1(0x40000000), %bb.3(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(eq), %0, %3 - %6:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %6, %bb.3 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %6:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %6(i1), %bb.3 G_BR %bb.1 bb.1: - successors: %bb.2, %bb.3 + successors: %bb.2(0x40000000), %bb.3(0x40000000) - %7:_(s32) = G_CONSTANT i32 99 - %8:_(s32) = G_CONSTANT i32 888 - %9:_(s1) = G_ICMP intpred(eq), %1, %7 - %10:_(s1) = G_ICMP intpred(eq), %1, %8 - G_BRCOND %10, %bb.3 + %7:_(i32) = G_CONSTANT i32 99 + %8:_(i32) = G_CONSTANT i32 888 + %9:_(i1) = G_ICMP intpred(eq), %1(i32), %7 + %10:_(i1) = G_ICMP intpred(eq), %1(i32), %8 + G_BRCOND %10(i1), %bb.3 G_BR %bb.2 bb.2: - successors: %bb.3 + successors: %bb.3(0x80000000) - %11:_(s32) = G_CONSTANT i32 123 - %12:_(s1) = G_ICMP intpred(eq), %2, %11 + %11:_(i32) = G_CONSTANT i32 123 + %12:_(i1) = G_ICMP intpred(eq), %2(i32), %11 G_BR %bb.3 bb.3: - %13:_(s1) = G_PHI %5, %bb.0, %9, %bb.1, %12, %bb.2 - %14:_(s32) = G_SELECT %13, %0, %1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %14 + %13:_(i1) = G_PHI %5(i1), %bb.0, %9(i1), %bb.1, %12(i1), %bb.2 + %14:_(i32) = G_SELECT %13(i1), %0, %1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %14(i32) + + + + + + ... @@ -249,86 +261,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -343,86 +360,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -437,82 +459,87 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -527,86 +554,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -621,86 +653,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -715,90 +752,95 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -813,88 +855,93 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -909,82 +956,87 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -999,84 +1051,89 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP1]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP1]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1091,92 +1148,97 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[SELECT]](i32), %bb.0, [[ANYEXT]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[SELECT]](i32), %bb.0, [[ANYEXT]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1191,92 +1253,97 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[C1]], [[C2]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[SELECT]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[C1]], [[C2]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[SELECT]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1291,86 +1358,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1385,88 +1457,93 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1481,86 +1558,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1575,84 +1657,89 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1667,91 +1754,96 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 456 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 123 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 456 + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[C2]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_result_scc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 123 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 456 - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[C1]], [[C2]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 123 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 456 + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[C1]], [[C2]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:vcc(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:vgpr(s32) = G_CONSTANT i32 123 - %9:vgpr(s32) = G_CONSTANT i32 456 - %10:vgpr(s32) = G_SELECT %7, %8, %9 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %10 + %7:vcc(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:vgpr(i32) = G_CONSTANT i32 123 + %9:vgpr(i32) = G_CONSTANT i32 456 + %10:vgpr(i32) = G_SELECT %7(i1), %8, %9 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %10(i32) + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir index b5a04cef68807..92aacc5067a60 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir @@ -13,74 +13,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $sgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $sgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_ss_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $sgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $sgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $sgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $sgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -94,74 +99,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $vgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $vgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_sv_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $vgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $vgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -175,74 +185,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $vgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_vs_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -256,74 +271,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $vgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_vv_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: phi_s32_ss_vcc_sbranch @@ -336,72 +356,77 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; FAST-NEXT: $sgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; FAST-NEXT: $sgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_ss_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; GREEDY-NEXT: $sgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; GREEDY-NEXT: $sgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $sgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $sgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -415,72 +440,77 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; FAST-NEXT: $vgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_sv_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $vgpr0, $vgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -494,72 +524,77 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; FAST-NEXT: $vgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_vs_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $vgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -573,72 +608,77 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; FAST-NEXT: $vgpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_vv_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: G_BRCOND [[ICMP]](i1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 - ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY4]](i32), %bb.1 + ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $vgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $vgpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $vgpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -652,88 +692,93 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -748,132 +793,139 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.3 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.3 ; FAST-NEXT: G_BR %bb.1 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99 - ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888 - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP3]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT1]](s32), %bb.3 + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 99 + ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 888 + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP3]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT1]](i32), %bb.3 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; FAST-NEXT: [[ICMP4:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; FAST-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP4]](s32) - ; FAST-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC4]](s1) + ; FAST-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 123 + ; FAST-NEXT: [[ICMP4:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; FAST-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP4]](i32) + ; FAST-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC4]](i1) ; FAST-NEXT: G_BR %bb.3 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1, [[ANYEXT2]](s32), %bb.2 - ; FAST-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC5]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT2]](s32), [[COPY]], [[COPY1]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1, [[ANYEXT2]](i32), %bb.2 + ; FAST-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC5]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT2]](i32), [[COPY]], [[COPY1]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_scc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.3 ; GREEDY-NEXT: G_BR %bb.1 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888 - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP3]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT1]](s32), %bb.3 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 99 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 888 + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C1]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C2]] + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP3]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT1]](i32), %bb.3 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; GREEDY-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP4]](s32) - ; GREEDY-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC4]](s1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 123 + ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C3]] + ; GREEDY-NEXT: [[TRUNC4:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP4]](i32) + ; GREEDY-NEXT: [[ANYEXT2:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC4]](i1) ; GREEDY-NEXT: G_BR %bb.3 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1, [[ANYEXT2]](s32), %bb.2 - ; GREEDY-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC5]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT2]](s32), [[COPY]], [[COPY1]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1, [[ANYEXT2]](i32), %bb.2 + ; GREEDY-NEXT: [[TRUNC5:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC5]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT2]](i32), [[COPY]], [[COPY1]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.3 + successors: %bb.1(0x40000000), %bb.3(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s1) = G_ICMP intpred(eq), %0, %3 - %6:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %6, %bb.3 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i32) = G_CONSTANT i32 0 + %5:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %6:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %6(i1), %bb.3 G_BR %bb.1 bb.1: - successors: %bb.2, %bb.3 + successors: %bb.2(0x40000000), %bb.3(0x40000000) - %7:_(s32) = G_CONSTANT i32 99 - %8:_(s32) = G_CONSTANT i32 888 - %9:_(s1) = G_ICMP intpred(eq), %1, %7 - %10:_(s1) = G_ICMP intpred(eq), %1, %8 - G_BRCOND %10, %bb.3 + %7:_(i32) = G_CONSTANT i32 99 + %8:_(i32) = G_CONSTANT i32 888 + %9:_(i1) = G_ICMP intpred(eq), %1(i32), %7 + %10:_(i1) = G_ICMP intpred(eq), %1(i32), %8 + G_BRCOND %10(i1), %bb.3 G_BR %bb.2 bb.2: - successors: %bb.3 + successors: %bb.3(0x80000000) - %11:_(s32) = G_CONSTANT i32 123 - %12:_(s1) = G_ICMP intpred(eq), %2, %11 + %11:_(i32) = G_CONSTANT i32 123 + %12:_(i1) = G_ICMP intpred(eq), %2(i32), %11 G_BR %bb.3 bb.3: - %13:_(s1) = G_PHI %5, %bb.0, %9, %bb.1, %12, %bb.2 - %14:_(s32) = G_SELECT %13, %0, %1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %14 + %13:_(i1) = G_PHI %5(i1), %bb.0, %9(i1), %bb.1, %12(i1), %bb.2 + %14:_(i32) = G_SELECT %13(i1), %0, %1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %14(i32) + + + + + + ... @@ -888,86 +940,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -982,86 +1039,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP2]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1076,82 +1138,87 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[ICMP2]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1166,86 +1233,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1260,86 +1332,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1354,90 +1431,95 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1452,88 +1534,93 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1548,82 +1635,87 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC1]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[ICMP]](i1), %bb.0, [[COPY4]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1638,84 +1730,89 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP1]](i1), %bb.1 + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY4]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(i1) = G_PHI [[COPY3]](i1), %bb.0, [[ICMP1]](i1), %bb.1 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[PHI]](i1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1730,92 +1827,97 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[SELECT]](i32), %bb.0, [[ANYEXT]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) + ; ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY3]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C1]], [[C2]] + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC1]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[SELECT]](i32), %bb.0, [[ANYEXT]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %0, %3 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %0(i32), %3 + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1830,92 +1932,97 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[C1]], [[C2]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[SELECT]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY1]](i32), [[COPY3]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP1]](i1), [[C1]], [[C2]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[SELECT]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC2]](i1) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_ICMP intpred(eq), %1, %3 + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -1930,86 +2037,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -2024,88 +2136,93 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr0, $sgpr1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -2120,86 +2237,91 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_v_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:vgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC3]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $vgpr1, $sgpr0 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -2214,84 +2336,89 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; FAST-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; FAST-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; FAST-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) + ; ; GREEDY-LABEL: name: phi_s1_s_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i1) + ; GREEDY-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; GREEDY-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 - ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC3]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT1]](s32), [[C]], [[COPY]] - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(i32) = G_PHI [[ANYEXT]](i32), %bb.0, [[ANYEXT1]](i32), %bb.1 + ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(i1) = G_TRUNC [[PHI]](i32) + ; GREEDY-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC3]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT1]](i32), [[C]], [[COPY]] + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](i32) bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_TRUNC %0 - %5:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %5, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_TRUNC %0(i32) + %5:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %5(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %6:_(s1) = G_TRUNC %1 + %6:_(i1) = G_TRUNC %1(i32) G_BR %bb.2 bb.2: - %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 - %8:_(s32) = G_SELECT %7, %3, %0 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + %7:_(i1) = G_PHI %4(i1), %bb.0, %6(i1), %bb.1 + %8:_(i32) = G_SELECT %7(i1), %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8(i32) + + + + ... @@ -2307,70 +2434,75 @@ body: | ; FAST-NEXT: successors: %bb.1(0x80000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; FAST-NEXT: G_BR %bb.1 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %5(s32), %bb.1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %5(i32), %bb.1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](s32) + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](i32) + ; ; GREEDY-LABEL: name: phi_s32_s_loop_v_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x80000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; GREEDY-NEXT: G_BR %bb.1 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %5(s32), %bb.1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %5(i32), %bb.1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](s32) + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](i32) bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1, $vgpr0 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = G_CONSTANT i32 0 G_BR %bb.1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) - %4:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - %5:_(s32) = COPY %2 - %6:_(s1) = G_ICMP intpred(eq), %1, %3 - G_BRCOND %6, %bb.1 + %4:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + %5:_(i32) = COPY %2(i32) + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 + G_BRCOND %6(i1), %bb.1 G_BR %bb.2 bb.2: - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %4 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %4(i32) + + + + ... @@ -2386,70 +2518,75 @@ body: | ; FAST-NEXT: successors: %bb.1(0x80000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; FAST-NEXT: G_BR %bb.1 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %5(s32), %bb.1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %5(i32), %bb.1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY2]](i32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](s32) + ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](i32) + ; ; GREEDY-LABEL: name: phi_s32_s_loop_s_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x80000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 ; GREEDY-NEXT: G_BR %bb.1 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %5(s32), %bb.1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %5(i32), %bb.1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY2]](i32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY1]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](s32) + ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](i32) bb.0: - successors: %bb.1 + successors: %bb.1(0x80000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 G_BR %bb.1 bb.1: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) - %4:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - %5:_(s32) = COPY %2 - %6:_(s1) = G_ICMP intpred(eq), %1, %3 - G_BRCOND %6, %bb.1 + %4:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + %5:_(i32) = COPY %2(i32) + %6:_(i1) = G_ICMP intpred(eq), %1(i32), %3 + G_BRCOND %6(i1), %bb.1 G_BR %bb.2 bb.2: - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %4 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %4(i32) + + + + ... @@ -2464,74 +2601,80 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %6(s32), %bb.2 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %6(i32), %bb.2 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.1(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[PHI]](s32), %bb.1 + ; FAST-NEXT: [[PHI1:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[PHI]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.1 + ; ; GREEDY-LABEL: name: phi_s32_ss_sbranch_cycle ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %6(s32), %bb.2 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %6(i32), %bb.2 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.1(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[PHI]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI1:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[PHI]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.1 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $sgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 - %5:_(s32) = G_PHI %0, %bb.0, %6, %bb.2 + successors: %bb.2(0x80000000) + + %5:_(i32) = G_PHI %0(i32), %bb.0, %6(i32), %bb.2 G_BR %bb.2 bb.2: - successors: %bb.1 - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 + successors: %bb.1(0x80000000) + + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 G_BR %bb.1 + + + ... --- @@ -2545,74 +2688,80 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $vgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %6(s32), %bb.2 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %6(i32), %bb.2 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.1(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[PHI]](s32), %bb.1 + ; FAST-NEXT: [[PHI1:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[PHI]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.1 + ; ; GREEDY-LABEL: name: phi_s32_vs_sbranch_cycle ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $vgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, %6(s32), %bb.2 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, %6(i32), %bb.2 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.1(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[PHI]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI1:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[PHI]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.1 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $vgpr0, $sgpr1, $sgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 - %5:_(s32) = G_PHI %0, %bb.0, %6, %bb.2 + successors: %bb.2(0x80000000) + + %5:_(i32) = G_PHI %0(i32), %bb.0, %6(i32), %bb.2 G_BR %bb.2 bb.2: - successors: %bb.1 - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 + successors: %bb.1(0x80000000) + + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 G_BR %bb.1 + + + ... --- @@ -2626,74 +2775,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $agpr0, $agpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:agpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $agpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:agpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $agpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_aa_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $agpr0, $agpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:agpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:agpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $agpr0, $agpr1, $sgpr2 - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $agpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $agpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $agpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $agpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -2707,74 +2861,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $agpr0, $vgpr0, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $agpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $agpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_av_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $agpr0, $vgpr0, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $agpr0, $vgpr0, $sgpr2 - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $agpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $agpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- name: phi_s32_va_sbranch @@ -2787,74 +2946,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $agpr0, $vgpr0, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $agpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $agpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_va_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $agpr0, $vgpr0, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $agpr0, $vgpr0, $sgpr2 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $agpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $agpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -2868,74 +3032,79 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $agpr0, $sgpr0, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $agpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $agpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_as_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $agpr0, $sgpr0, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $agpr0, $sgpr0, $sgpr2 - %0:_(s32) = COPY $agpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $agpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $agpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $agpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... --- @@ -2949,72 +3118,77 @@ body: | ; FAST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: liveins: $agpr0, $sgpr0, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY [[COPY1]](i32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; FAST-NEXT: $agpr0 = COPY [[PHI]](s32) + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; FAST-NEXT: $agpr0 = COPY [[PHI]](i32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 + ; ; GREEDY-LABEL: name: phi_s32_sa_sbranch ; GREEDY: bb.0: ; GREEDY-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: liveins: $agpr0, $sgpr0, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:agpr(i32) = COPY $agpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: G_BRCOND [[ZEXT]](i32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:agpr(i32) = COPY [[COPY1]](i32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 - ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](s32) + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(i32) = G_PHI [[COPY]](i32), %bb.0, [[COPY3]](i32), %bb.1 + ; GREEDY-NEXT: $agpr0 = COPY [[PHI]](i32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: - successors: %bb.1, %bb.2 + successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $agpr0, $sgpr0, $sgpr2 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $agpr0 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - G_BRCOND %4, %bb.1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $agpr0 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + G_BRCOND %4(i1), %bb.1 G_BR %bb.2 bb.1: - successors: %bb.2 + successors: %bb.2(0x80000000) - %5:_(s32) = COPY %1 + %5:_(i32) = COPY %1(i32) G_BR %bb.2 bb.2: - %6:_(s32) = G_PHI %0, %bb.0, %5, %bb.1 - $agpr0 = COPY %6 + %6:_(i32) = G_PHI %0(i32), %bb.0, %5(i32), %bb.1 + $agpr0 = COPY %6(i32) S_SETPC_B64 undef $sgpr30_sgpr31 + + + + ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-pseudo-scalar-transcendental.mir index af0db8a692775..90705b3533f47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-pseudo-scalar-transcendental.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-pseudo-scalar-transcendental.mir @@ -12,12 +12,14 @@ body: | ; CHECK-LABEL: name: v_s_exp_f32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:sgpr(s32) = G_FEXP2 [[FEXP2_]] - ; CHECK-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FEXP2 %1 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:sgpr(f32) = G_FEXP2 [[FEXP2_]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[FEXP2_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_FEXP2 %1 + %2:_(i32) = G_BITCAST %1(f32) + $vgpr0 = COPY %2(i32) ... --- name: v_s_exp_f16 @@ -29,16 +31,20 @@ body: | ; CHECK-LABEL: name: v_s_exp_f16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:sgpr(s16) = G_FEXP2 [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[FEXP2_]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = G_FEXP2 %1 - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:sgpr(f16) = G_FEXP2 [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[FEXP2_]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FEXP2 %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -51,12 +57,16 @@ body: | ; CHECK-LABEL: name: v_s_log_f32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:sgpr(s32) = G_FLOG2 [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FLOG2_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FLOG2 %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:sgpr(f32) = G_FLOG2 [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[FLOG2_]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FLOG2 %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -69,16 +79,20 @@ body: | ; CHECK-LABEL: name: v_s_log_f16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:sgpr(s16) = G_FLOG2 [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[FLOG2_]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = G_FLOG2 %1 - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:sgpr(f16) = G_FLOG2 [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[FLOG2_]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FLOG2 %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -91,12 +105,16 @@ body: | ; CHECK-LABEL: name: v_s_rcp_f32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0(s32) - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(f32) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(f32) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -109,16 +127,20 @@ body: | ; CHECK-LABEL: name: v_s_rcp_f16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s16) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[INT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1(s16) - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(f16) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[BITCAST]](f16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[INT]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = nnan ninf nsz arcp contract afn reassoc G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(f16) + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -131,12 +153,16 @@ body: | ; CHECK-LABEL: name: v_s_rsq_f32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0(s32) - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(f32) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -149,16 +175,20 @@ body: | ; CHECK-LABEL: name: v_s_rsq_f16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[TRUNC]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[INT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1(s16) - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[BITCAST]](f16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[INT]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %2(f16) + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -171,12 +201,16 @@ body: | ; CHECK-LABEL: name: v_s_sqrt_f32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:sgpr(s32) = G_FSQRT [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[FSQRT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FSQRT %0 - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:sgpr(f32) = G_FSQRT [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[FSQRT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_FSQRT %1 + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -189,16 +223,20 @@ body: | ; CHECK-LABEL: name: v_s_sqrt_f16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:sgpr(s16) = G_FSQRT [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[FSQRT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = G_FSQRT %1 - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:sgpr(f16) = G_FSQRT [[BITCAST]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[FSQRT]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_FSQRT %2 + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... --- @@ -211,12 +249,16 @@ body: | ; CHECK-LABEL: name: v_amdgcn_sqrt_f32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %0(s32) - $vgpr0 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[BITCAST]](f32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[INT]](f32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %1(f32) + %3:_(i32) = G_BITCAST %2(f32) + $vgpr0 = COPY %3(i32) ... --- @@ -229,15 +271,19 @@ body: | ; CHECK-LABEL: name: v_amdgcn_sqrt_f16 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[TRUNC]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[INT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0(s32) - %2:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %1(s16) - %3:_(s32) = G_ANYEXT %2(s16) - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[BITCAST]](f16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i16) = G_BITCAST [[INT]](f16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[BITCAST1]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(f16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), %2(f16) + %4:_(i16) = G_BITCAST %3(f16) + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir index 55048d51918fa..4f3f44f8307e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir @@ -14,11 +14,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[COPY]], [[C]](i64) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(p1) = G_PTR_ADD %0, %1 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(p1) = G_PTR_ADD %0, %1(i64) ... --- @@ -33,11 +33,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(p1) = G_PTR_ADD %0, %1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(p1) = G_PTR_ADD %0, %1(i64) ... --- @@ -52,12 +52,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY [[C]](i64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(p1) = G_PTR_ADD %0, %1 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(p1) = G_PTR_ADD %0, %1(i64) ... --- @@ -72,12 +72,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY2]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY1]](i64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY2]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $sgpr0_sgpr1 - %2:_(p1) = G_PTR_ADD %0, %1 + %1:_(i64) = COPY $sgpr0_sgpr1 + %2:_(p1) = G_PTR_ADD %0, %1(i64) ... --- @@ -92,9 +92,9 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p1) = G_PTR_ADD %0, %1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p1) = G_PTR_ADD %0, %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir index 1cddf55a3a53e..e8d9932dd8928 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir @@ -14,11 +14,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p1) = G_PTRMASK [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p1) = G_PTRMASK [[COPY]], [[C]](i64) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(p1) = G_PTRMASK %0, %1 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(p1) = G_PTRMASK %0, %1(i64) ... --- @@ -33,11 +33,11 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](i64) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(p1) = G_PTRMASK %0, %1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(p1) = G_PTRMASK %0, %1(i64) ... --- @@ -52,12 +52,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY [[C]](i64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_CONSTANT i64 1 - %2:_(p1) = G_PTRMASK %0, %1 + %1:_(i64) = G_CONSTANT i64 1 + %2:_(p1) = G_PTRMASK %0, %1(i64) ... --- @@ -72,12 +72,12 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY2]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY [[COPY1]](i64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY2]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $sgpr0_sgpr1 - %2:_(p1) = G_PTRMASK %0, %1 + %1:_(i64) = COPY $sgpr0_sgpr1 + %2:_(p1) = G_PTRMASK %0, %1(i64) ... --- @@ -92,9 +92,9 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](i64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(p1) = G_PTRMASK %0, %1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(p1) = G_PTRMASK %0, %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrtoint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrtoint.mir index 2db1a5b1d2ba2..eeb95ab8d1221 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrtoint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrtoint.mir @@ -13,9 +13,9 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:sgpr(s64) = G_PTRTOINT [[COPY]](p1) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:sgpr(i64) = G_PTRTOINT [[COPY]](p1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_PTRTOINT %0 + %1:_(i64) = G_PTRTOINT %0(p1) ... --- @@ -29,7 +29,7 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:vgpr(s64) = G_PTRTOINT [[COPY]](p1) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:vgpr(i64) = G_PTRTOINT [[COPY]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_PTRTOINT %0 + %1:_(i64) = G_PTRTOINT %0(p1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-reg-sequence.mir index cee18c420e6c8..05e2a4beda3ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-reg-sequence.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-reg-sequence.mir @@ -14,12 +14,12 @@ body: | ; CHECK-LABEL: name: reg_sequence_ss_vreg ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr(s64) = REG_SEQUENCE [[COPY]](s32), %subreg.sub0, [[COPY1]](s32), %subreg.sub1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr(i64) = REG_SEQUENCE [[COPY]](i32), %subreg.sub0, [[COPY1]](i32), %subreg.sub1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = REG_SEQUENCE %0(i32), %subreg.sub0, %1(i32), %subreg.sub1 ... --- @@ -34,8 +34,8 @@ body: | ; CHECK-LABEL: name: reg_sequence_ss_physreg ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr(s64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr1, %subreg.sub1 - %0:_(s64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr1, %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr(i64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr1, %subreg.sub1 + %0:_(i64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr1, %subreg.sub1 ... --- @@ -50,12 +50,12 @@ body: | ; CHECK-LABEL: name: reg_sequence_sv_vreg ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(s64) = REG_SEQUENCE [[COPY]](s32), %subreg.sub0, [[COPY1]](s32), %subreg.sub1 - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s64) = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(i64) = REG_SEQUENCE [[COPY]](i32), %subreg.sub0, [[COPY1]](i32), %subreg.sub1 + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i64) = REG_SEQUENCE %0(i32), %subreg.sub0, %1(i32), %subreg.sub1 ... --- @@ -70,8 +70,8 @@ body: | ; CHECK-LABEL: name: reg_sequence_sv_physreg ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(s64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $vgpr0, %subreg.sub1 - %0:_(s64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $vgpr0, %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(i64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $vgpr0, %subreg.sub1 + %0:_(i64) = REG_SEQUENCE $sgpr0, %subreg.sub0, $vgpr0, %subreg.sub1 ... --- @@ -86,12 +86,12 @@ body: | ; CHECK-LABEL: name: reg_sequence_vs_vreg ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(s64) = REG_SEQUENCE [[COPY]](s32), %subreg.sub0, [[COPY1]](s32), %subreg.sub1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s64) = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(i64) = REG_SEQUENCE [[COPY]](i32), %subreg.sub0, [[COPY1]](i32), %subreg.sub1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i64) = REG_SEQUENCE %0(i32), %subreg.sub0, %1(i32), %subreg.sub1 ... --- @@ -106,8 +106,8 @@ body: | ; CHECK-LABEL: name: reg_sequence_vs_physreg ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(s64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $sgpr0, %subreg.sub1 - %0:_(s64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $sgpr0, %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(i64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $sgpr0, %subreg.sub1 + %0:_(i64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $sgpr0, %subreg.sub1 ... --- @@ -122,12 +122,12 @@ body: | ; CHECK-LABEL: name: reg_sequence_vv_vreg ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(s64) = REG_SEQUENCE [[COPY]](s32), %subreg.sub0, [[COPY1]](s32), %subreg.sub1 - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(i64) = REG_SEQUENCE [[COPY]](i32), %subreg.sub0, [[COPY1]](i32), %subreg.sub1 + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = REG_SEQUENCE %0(i32), %subreg.sub0, %1(i32), %subreg.sub1 ... --- @@ -142,7 +142,6 @@ body: | ; CHECK-LABEL: name: reg_sequence_vv_physreg ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(s64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $vgpr1, %subreg.sub1 - %0:_(s64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $vgpr1, %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr(i64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $vgpr1, %subreg.sub1 + %0:_(i64) = REG_SEQUENCE $vgpr0, %subreg.sub0, $vgpr1, %subreg.sub1 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir index 524714df5b974..e1e903a2944bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir @@ -12,33 +12,34 @@ body: | ; FAST-LABEL: name: sadde_s32_sss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:sgpr(s32), [[SADDE1:%[0-9]+]]:sgpr(s32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SADDE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:sgpr(i32), [[SADDE1:%[0-9]+]]:sgpr(i32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SADDE1]](i32) + ; ; GREEDY-LABEL: name: sadde_s32_sss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:sgpr(s32), [[SADDE1:%[0-9]+]]:sgpr(s32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SADDE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_SADDE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:sgpr(i32), [[SADDE1:%[0-9]+]]:sgpr(i32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SADDE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_SADDE %0, %1, %4 ... --- @@ -51,33 +52,34 @@ body: | ; FAST-LABEL: name: sadde_s32_vss ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(i32), [[SADDE1:%[0-9]+]]:vcc(i1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + ; ; GREEDY-LABEL: name: sadde_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_SADDE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(i32), [[SADDE1:%[0-9]+]]:vcc(i1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_SADDE %0, %1, %4 ... --- name: sadde_s32_ssv @@ -89,30 +91,31 @@ body: | ; FAST-LABEL: name: sadde_s32_ssv ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(i32), [[SADDE1:%[0-9]+]]:vcc(i1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; ; GREEDY-LABEL: name: sadde_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_SADDE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(i32), [[SADDE1:%[0-9]+]]:vcc(i1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_SADDE %0, %1, %3 ... --- @@ -125,26 +128,27 @@ body: | ; FAST-LABEL: name: sadde_s32_vvs ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(i32), [[SADDE1:%[0-9]+]]:vcc(i1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + ; ; GREEDY-LABEL: name: sadde_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_SADDE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(i32), [[SADDE1:%[0-9]+]]:vcc(i1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_SADDE %0, %1, %3 ... --- @@ -157,26 +161,27 @@ body: | ; FAST-LABEL: name: sadde_s32_sss_noscc ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:sgpr(s32), [[SADDE1:%[0-9]+]]:sgpr(s32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SADDE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:sgpr(i32), [[SADDE1:%[0-9]+]]:sgpr(i32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SADDE1]](i32) + ; ; GREEDY-LABEL: name: sadde_s32_sss_noscc ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:sgpr(s32), [[SADDE1:%[0-9]+]]:sgpr(s32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SADDE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_SADDE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:sgpr(i32), [[SADDE1:%[0-9]+]]:sgpr(i32) = G_SADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SADDE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_SADDE %0, %1, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir index 264c7b03aeeba..b64a020f44f43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir @@ -12,12 +12,16 @@ body: | ; GFX1150-LABEL: name: fadd_f32 ; GFX1150: liveins: $sgpr0, $sgpr1 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX1150-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_FADD %0, %1 + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY1]](i32) + ; GFX1150-NEXT: [[FADD:%[0-9]+]]:sgpr(f32) = G_FADD [[BITCAST]], [[BITCAST1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(f32) = G_BITCAST %0(i32) + %3:_(f32) = G_BITCAST %1(i32) + %4:_(f32) = G_FADD %2, %3 ... --- @@ -30,10 +34,12 @@ body: | ; GFX1150-LABEL: name: fptosi_f32_to_i32 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[FPTOSI:%[0-9]+]]:sgpr(s32) = G_FPTOSI [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FPTOSI %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[FPTOSI:%[0-9]+]]:sgpr(i32) = G_FPTOSI [[BITCAST]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOSI %1(f32) ... --- @@ -46,10 +52,12 @@ body: | ; GFX1150-LABEL: name: fptoui_f32_to_u32 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FPTOUI %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(i32) = G_FPTOUI [[BITCAST]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(i32) = G_FPTOUI %1(f32) ... --- @@ -62,10 +70,10 @@ body: | ; GFX1150-LABEL: name: sitofp_i32_to_f32 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[SITOFP:%[0-9]+]]:sgpr(s32) = G_SITOFP [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_SITOFP %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[SITOFP:%[0-9]+]]:sgpr(f32) = G_SITOFP [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_SITOFP %0(i32) ... --- @@ -78,10 +86,10 @@ body: | ; GFX1150-LABEL: name: uitofp_u32_to_f32 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[UITOFP:%[0-9]+]]:sgpr(s32) = G_UITOFP [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_UITOFP %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[UITOFP:%[0-9]+]]:sgpr(f32) = G_UITOFP [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_UITOFP %0(i32) ... --- @@ -94,10 +102,12 @@ body: | ; GFX1150-LABEL: name: fptrunc_f32_to_f16 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[FPTRUNC:%[0-9]+]]:sgpr(s16) = G_FPTRUNC [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_FPTRUNC %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[FPTRUNC:%[0-9]+]]:sgpr(f16) = G_FPTRUNC [[BITCAST]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f16) = G_FPTRUNC %1(f32) ... --- @@ -110,10 +120,10 @@ body: | ; GFX1150-LABEL: name: fpext_f16_to_f32 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - %1:_(s32) = COPY $sgpr0 - %0:_(s16) = G_TRUNC %1(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) ... # Tests below should not select scalar registers @@ -128,14 +138,18 @@ body: | ; GFX1150-LABEL: name: fadd_f64 ; GFX1150: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX1150-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX1150-NEXT: [[FADD:%[0-9]+]]:vgpr(s64) = G_FADD [[COPY2]], [[COPY3]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_FADD %0, %1 + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX1150-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY1]](i64) + ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST]](f64) + ; GFX1150-NEXT: [[COPY3:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST1]](f64) + ; GFX1150-NEXT: [[FADD:%[0-9]+]]:vgpr(f64) = G_FADD [[COPY2]], [[COPY3]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(f64) = G_BITCAST %0(i64) + %3:_(f64) = G_BITCAST %1(i64) + %4:_(f64) = G_FADD %2, %3 ... --- @@ -148,11 +162,13 @@ body: | ; GFX1150-LABEL: name: fptosi_f64_to_i32 ; GFX1150: liveins: $sgpr0, $sgpr1 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX1150-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[COPY1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_FPTOSI %0(s64) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST]](f64) + ; GFX1150-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(i32) = G_FPTOSI [[COPY1]](f64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(i32) = G_FPTOSI %1(f64) ... --- @@ -165,13 +181,15 @@ body: | ; GFX1150-LABEL: name: fptoui_f16_to_u16 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX1150-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s16) = G_FPTOUI [[COPY1]](s16) - %1:_(s32) = COPY $sgpr0 - %0:_(s16) = G_TRUNC %1(s32) - %2:_(s16) = G_FPTOUI %0(s16) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f16) = G_BITCAST [[TRUNC]](i16) + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(f16) = COPY [[BITCAST]](f16) + ; GFX1150-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(i16) = G_FPTOUI [[COPY1]](f16) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_BITCAST %1(i16) + %3:_(i16) = G_FPTOUI %2(f16) ... --- @@ -184,11 +202,11 @@ body: | ; GFX1150-LABEL: name: sitofp_i32_to_f64 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX1150-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s64) = G_SITOFP [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s64) = G_SITOFP %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX1150-NEXT: [[SITOFP:%[0-9]+]]:vgpr(f64) = G_SITOFP [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f64) = G_SITOFP %0(i32) ... --- @@ -201,13 +219,13 @@ body: | ; GFX1150-LABEL: name: uitofp_u16_to_f16 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX1150-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s16) = G_UITOFP [[COPY1]](s16) - %1:_(s32) = COPY $sgpr0 - %0:_(s16) = G_TRUNC %1(s32) - %2:_(s16) = G_UITOFP %0(s16) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; GFX1150-NEXT: [[UITOFP:%[0-9]+]]:vgpr(f16) = G_UITOFP [[COPY1]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(f16) = G_UITOFP %1(i16) ... --- @@ -220,11 +238,13 @@ body: | ; GFX1150-LABEL: name: fptrunc_f64_to_f32 ; GFX1150: liveins: $sgpr0_sgpr1 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX1150-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[COPY1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_FPTRUNC %0(s64) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f64) = G_BITCAST [[COPY]](i64) + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(f64) = COPY [[BITCAST]](f64) + ; GFX1150-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(f32) = G_FPTRUNC [[COPY1]](f64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(f64) = G_BITCAST %0(i64) + %2:_(f32) = G_FPTRUNC %1(f64) ... --- @@ -237,10 +257,12 @@ body: | ; GFX1150-LABEL: name: fpext_f32_to_f64 ; GFX1150: liveins: $sgpr0 ; GFX1150-NEXT: {{ $}} - ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX1150-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s64) = G_FPEXT %0(s32) + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX1150-NEXT: [[BITCAST:%[0-9]+]]:sgpr(f32) = G_BITCAST [[COPY]](i32) + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(f32) = COPY [[BITCAST]](f32) + ; GFX1150-NEXT: [[FPEXT:%[0-9]+]]:vgpr(f64) = G_FPEXT [[COPY1]](f32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_BITCAST %0(i32) + %2:_(f64) = G_FPEXT %1(f32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir index 97c006a1a7216..78922e7684f53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir @@ -17,16 +17,16 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_SBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(i32) = G_SBFX [[COPY]], [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_SBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... --- @@ -40,18 +40,18 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_vii ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 10 - %2:_(s32) = G_CONSTANT i32 4 - %3:_(s32) = G_SBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(i32) = G_SBFX [[COPY]], [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 10 + %2:_(i32) = G_CONSTANT i32 4 + %3:_(i32) = G_SBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... --- @@ -65,18 +65,18 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_vss ; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_SBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(i32) = G_SBFX [[COPY]], [[COPY3]](i32), [[COPY4]] + ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_SBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... # Expand to a sequence that implements the 64-bit bitfield extract using @@ -92,21 +92,21 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_vvv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = COPY $vgpr3 - %3:_(s64) = G_SBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[C]], [[COPY2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[ASHR]], [[SUB]](i32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(i64) = G_ASHR [[SHL]], [[SUB]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = COPY $vgpr3 + %3:_(i64) = G_SBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -120,21 +120,21 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_vss ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_SBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[C]], [[COPY2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[ASHR]], [[SUB]](i32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(i64) = G_ASHR [[SHL]], [[SUB]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i64) = G_SBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... # If the offset and width are constants, use the 32-bit bitfield extract, @@ -150,24 +150,24 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_vii_small ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV]], [[C2]](s32), [[COPY2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s32) = G_ASHR [[SBFX]], [[C3]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SBFX]](s32), [[ASHR1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 31 - %2:_(s32) = G_CONSTANT i32 4 - %3:_(s64) = G_SBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(i32) = G_SBFX [[UV]], [[C2]](i32), [[COPY2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(i32) = G_ASHR [[SBFX]], [[C3]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SBFX]](i32), [[ASHR1]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i32) = G_CONSTANT i32 4 + %3:_(i64) = G_SBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -181,23 +181,23 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_vii_big ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV1]], [[C2]](s32), [[C3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UV]](s32), [[SBFX]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s32) = G_CONSTANT i32 40 - %3:_(s64) = G_SBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i64) = G_ASHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(i32) = G_SBFX [[UV1]], [[C2]](i32), [[C3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UV]](i32), [[SBFX]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i32) = G_CONSTANT i32 40 + %3:_(i64) = G_SBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -211,22 +211,22 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_svv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_SBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i64) = G_ASHR [[COPY3]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[ASHR]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[C]], [[COPY2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[ASHR]], [[SUB]](i32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(i64) = G_ASHR [[SHL]], [[SUB]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i64) = G_SBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... # Expand to a sequence that combines the offset and width for the two operand @@ -242,17 +242,17 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_SBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(i32) = G_SBFX [[COPY3]], [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_SBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... --- @@ -266,21 +266,21 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_SBFX %0, %1(s32), %2 - $sgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(i32) = S_BFE_I32 [[COPY]](i32), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_SBFX %0, %1(i32), %2 + $sgpr0 = COPY %3(i32) ... --- @@ -294,21 +294,21 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_sii ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 10 - %3:_(s32) = G_SBFX %0, %1(s32), %2 - $sgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[C]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C1]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(i32) = S_BFE_I32 [[COPY]](i32), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 10 + %3:_(i32) = G_SBFX %0, %1(i32), %2 + $sgpr0 = COPY %3(i32) ... # Expand to a sequence that combines the offset and width for the two operand @@ -324,21 +324,21 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_sss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s64) = G_SBFX %0, %1(s32), %2 - $sgpr0_sgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(i64) = S_BFE_I64 [[COPY]](i64), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i64) = G_SBFX %0, %1(i32), %2 + $sgpr0_sgpr1 = COPY %3(i64) ... --- @@ -352,17 +352,17 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_sii ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 10 - %3:_(s64) = G_SBFX %0, %1(s32), %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[C]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C1]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(i64) = S_BFE_I64 [[COPY]](i64), [[OR]](i32), implicit-def $scc + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 10 + %3:_(i64) = G_SBFX %0, %1(i32), %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir index 810724dab685d..d21e934bee3e4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir @@ -11,32 +11,32 @@ body: | ; FAST-LABEL: name: select_s32_scc_ss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_s32_scc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s32) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i32) = G_SELECT %4(i1), %2, %3 ... --- @@ -48,34 +48,34 @@ body: | ; FAST-LABEL: name: select_s32_scc_sv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY3]] ; ; GREEDY-LABEL: name: select_s32_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s32) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY5]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i32) = G_SELECT %4(i1), %2, %3 ... @@ -88,34 +88,34 @@ body: | ; FAST-LABEL: name: select_s32_scc_vs ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY3]], [[COPY5]] ; ; GREEDY-LABEL: name: select_s32_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $vgpr0 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s32) = G_SELECT %4, %3, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY3]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $vgpr0 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i32) = G_SELECT %4(i1), %3, %2 ... --- @@ -127,32 +127,32 @@ body: | ; FAST-LABEL: name: select_s32_scc_vv ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_s32_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s32) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i32) = G_SELECT %4(i1), %2, %3 ... --- @@ -164,32 +164,32 @@ body: | ; FAST-LABEL: name: select_s32_vcc_ss ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[COPY5]] ; ; GREEDY-LABEL: name: select_s32_vcc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(s32) = G_SELECT %4, %0, %1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(i32) = G_SELECT %4(i1), %0, %1 ... --- @@ -201,30 +201,30 @@ body: | ; FAST-LABEL: name: select_s32_vcc_sv ; FAST: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[COPY3]] ; ; GREEDY-LABEL: name: select_s32_vcc_sv ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s32) = G_SELECT %4, %0, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY4]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = COPY $vgpr2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i32) = G_SELECT %4(i1), %0, %3 ... --- @@ -236,30 +236,30 @@ body: | ; FAST-LABEL: name: select_s32_vcc_vs ; FAST: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY3]], [[COPY4]] ; ; GREEDY-LABEL: name: select_s32_vcc_vs ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s32) = G_SELECT %4, %3, %0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY3]], [[COPY4]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = COPY $vgpr2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i32) = G_SELECT %4(i1), %3, %0 ... --- @@ -271,28 +271,28 @@ body: | ; FAST-LABEL: name: select_s32_vcc_vv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_s32_vcc_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s32) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i32) = G_SELECT %4(i1), %2, %3 ... --- @@ -304,32 +304,32 @@ body: | ; FAST-LABEL: name: select_s64_sss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY $sgpr4_sgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(i64) = COPY $sgpr4_sgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i64) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_s64_sss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY $sgpr4_sgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = COPY $sgpr2_sgpr3 - %3:_(s64) = COPY $sgpr4_sgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s64) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(i64) = COPY $sgpr4_sgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i64) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = COPY $sgpr2_sgpr3 + %3:_(i64) = COPY $sgpr4_sgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i64) = G_SELECT %4(i1), %2, %3 ... --- @@ -341,40 +341,40 @@ body: | ; FAST-LABEL: name: select_s64_ssv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = COPY $sgpr2_sgpr3 - %3:_(s64) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s64) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = COPY $sgpr2_sgpr3 + %3:_(i64) = COPY $vgpr0_vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i64) = G_SELECT %4(i1), %2, %3 ... @@ -387,40 +387,40 @@ body: | ; FAST-LABEL: name: select_s64_svs ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_svs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = COPY $sgpr2_sgpr3 - %3:_(s64) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s64) = G_SELECT %4, %3, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = COPY $sgpr2_sgpr3 + %3:_(i64) = COPY $vgpr0_vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i64) = G_SELECT %4(i1), %3, %2 ... --- @@ -432,40 +432,40 @@ body: | ; FAST-LABEL: name: select_s64_svv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_svv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s64) = COPY $vgpr0_vgpr1 - %3:_(s64) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s64) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i64) = COPY $vgpr0_vgpr1 + %3:_(i64) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i64) = G_SELECT %4(i1), %2, %3 ... --- @@ -477,36 +477,36 @@ body: | ; FAST-LABEL: name: select_s64_vss ; FAST: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_vss ; GREEDY: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(s64) = G_SELECT %4, %0, %1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(i64) = G_SELECT %4(i1), %0, %1 ... --- @@ -518,36 +518,36 @@ body: | ; FAST-LABEL: name: select_s64_vsv ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_vsv ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s64) = G_SELECT %4, %0, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i64) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i64) = G_SELECT %4(i1), %0, %3 ... --- @@ -559,36 +559,36 @@ body: | ; FAST-LABEL: name: select_s64_vvs ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_vvs ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(s64) = G_SELECT %4, %3, %0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i64) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i64) = G_SELECT %4(i1), %3, %0 ... --- @@ -600,36 +600,36 @@ body: | ; FAST-LABEL: name: select_s64_vvv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr4_vgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_vvv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $vgpr2_vgpr3 - %3:_(s64) = COPY $vgpr4_vgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s64) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr4_vgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $vgpr2_vgpr3 + %3:_(i64) = COPY $vgpr4_vgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i64) = G_SELECT %4(i1), %2, %3 ... --- @@ -641,36 +641,36 @@ body: | ; FAST-LABEL: name: select_s64_vvv_flags ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = nnan G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = nnan G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr4_vgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = nnan G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = nnan G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_s64_vvv_flags ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = nnan G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = nnan G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s64) = COPY $vgpr2_vgpr3 - %3:_(s64) = COPY $vgpr4_vgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(s64) = nnan G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY $vgpr4_vgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](i64) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = nnan G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = nnan G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i64) = COPY $vgpr2_vgpr3 + %3:_(i64) = COPY $vgpr4_vgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(i64) = nnan G_SELECT %4(i1), %2, %3 ... --- @@ -682,32 +682,32 @@ body: | ; FAST-LABEL: name: select_v2s32_scc_ss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(<2 x s32>) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr4_sgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(<2 x i32>) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_v2s32_scc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(<2 x s32>) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %3:_(<2 x s32>) = COPY $sgpr4_sgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr4_sgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(<2 x i32>) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %3:_(<2 x i32>) = COPY $sgpr4_sgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x i32>) = G_SELECT %4(i1), %2, %3 ... --- @@ -719,40 +719,40 @@ body: | ; FAST-LABEL: name: select_v2s32_scc_sv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %3:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %3:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x i32>) = G_SELECT %4(i1), %2, %3 ... @@ -765,40 +765,40 @@ body: | ; FAST-LABEL: name: select_v2s32_scc_vs ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %3:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %3, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %3:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x i32>) = G_SELECT %4(i1), %3, %2 ... --- @@ -810,40 +810,40 @@ body: | ; FAST-LABEL: name: select_v2s32_scc_vv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %3:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x i32>) = G_SELECT %4(i1), %2, %3 ... --- @@ -855,36 +855,36 @@ body: | ; FAST-LABEL: name: select_v2s32_vcc_ss ; FAST: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_vcc_ss ; GREEDY: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(<2 x s32>) = G_SELECT %4, %0, %1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(<2 x i32>) = G_SELECT %4(i1), %0, %1 ... --- @@ -896,36 +896,36 @@ body: | ; FAST-LABEL: name: select_v2s32_vcc_sv ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_vcc_sv ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(<2 x s32>) = G_SELECT %4, %0, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(<2 x i32>) = G_SELECT %4(i1), %0, %3 ... --- @@ -937,36 +937,36 @@ body: | ; FAST-LABEL: name: select_v2s32_vcc_vs ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_vcc_vs ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(<2 x s32>) = G_SELECT %4, %3, %0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(<2 x i32>) = G_SELECT %4(i1), %3, %0 ... --- @@ -978,36 +978,36 @@ body: | ; FAST-LABEL: name: select_v2s32_vcc_vv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr4_vgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr4_vgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_v2s32_vcc_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %3:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<2 x s32>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr4_vgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](<2 x i32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](<2 x i32>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %3:_(<2 x i32>) = COPY $vgpr4_vgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<2 x i32>) = G_SELECT %4(i1), %2, %3 ... --- @@ -1019,32 +1019,32 @@ body: | ; FAST-LABEL: name: select_v4s16_scc_ss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(<4 x s16>) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr4_sgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(<4 x i16>) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_v4s16_scc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(<4 x s16>) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %3:_(<4 x s16>) = COPY $sgpr4_sgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x s16>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr4_sgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(<4 x i16>) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %3:_(<4 x i16>) = COPY $sgpr4_sgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x i16>) = G_SELECT %4(i1), %2, %3 ... --- @@ -1056,40 +1056,40 @@ body: | ; FAST-LABEL: name: select_v4s16_scc_sv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %3:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x s16>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %3:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x i16>) = G_SELECT %4(i1), %2, %3 ... @@ -1102,40 +1102,40 @@ body: | ; FAST-LABEL: name: select_v4s16_scc_vs ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %3:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x s16>) = G_SELECT %4, %3, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %3:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x i16>) = G_SELECT %4(i1), %3, %2 ... --- @@ -1147,40 +1147,40 @@ body: | ; FAST-LABEL: name: select_v4s16_scc_vv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %3:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x s16>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %3:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x i16>) = G_SELECT %4(i1), %2, %3 ... --- @@ -1192,36 +1192,36 @@ body: | ; FAST-LABEL: name: select_v4s16_vcc_ss ; FAST: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_vcc_ss ; GREEDY: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(<4 x s16>) = G_SELECT %4, %0, %1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(<4 x i16>) = G_SELECT %4(i1), %0, %1 ... --- @@ -1233,36 +1233,36 @@ body: | ; FAST-LABEL: name: select_v4s16_vcc_sv ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_vcc_sv ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(<4 x s16>) = G_SELECT %4, %0, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(<4 x i16>) = G_SELECT %4(i1), %0, %3 ... --- @@ -1274,36 +1274,36 @@ body: | ; FAST-LABEL: name: select_v4s16_vcc_vs ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_vcc_vs ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(<4 x s16>) = G_SELECT %4, %3, %0 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(<4 x i16>) = G_SELECT %4(i1), %3, %0 ... --- @@ -1315,36 +1315,36 @@ body: | ; FAST-LABEL: name: select_v4s16_vcc_vv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr4_vgpr5 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) ; ; GREEDY-LABEL: name: select_v4s16_vcc_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %3:_(<4 x s16>) = COPY $vgpr4_vgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(<4 x s16>) = G_SELECT %4, %2, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr4_vgpr5 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY2]](<4 x i16>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY3]](<4 x i16>) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x i16>) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[SELECT]](<2 x i16>), [[SELECT1]](<2 x i16>) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %3:_(<4 x i16>) = COPY $vgpr4_vgpr5 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(<4 x i16>) = G_SELECT %4(i1), %2, %3 ... --- @@ -1356,32 +1356,32 @@ body: | ; FAST-LABEL: name: select_p1_scc_ss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(p1) = COPY $sgpr4_sgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(p1) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(p1) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_p1_scc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(p1) = COPY $sgpr4_sgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(p1) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(p1) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 %2:_(p1) = COPY $sgpr2_sgpr3 %3:_(p1) = COPY $sgpr4_sgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p1) = G_SELECT %4, %2, %3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p1) = G_SELECT %4(i1), %2, %3 ... --- @@ -1393,32 +1393,32 @@ body: | ; FAST-LABEL: name: select_p999_scc_ss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(p999) = COPY $sgpr2_sgpr3 ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(p999) = COPY $sgpr4_sgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] ; ; GREEDY-LABEL: name: select_p999_scc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(p999) = COPY $sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(p999) = COPY $sgpr4_sgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[ZEXT]](i32), [[COPY2]], [[COPY3]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 %2:_(p999) = COPY $sgpr2_sgpr3 %3:_(p999) = COPY $sgpr4_sgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p999) = G_SELECT %4, %2, %3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p999) = G_SELECT %4(i1), %2, %3 ... --- @@ -1430,40 +1430,40 @@ body: | ; FAST-LABEL: name: select_p1_scc_sv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 %2:_(p1) = COPY $sgpr2_sgpr3 %3:_(p1) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p1) = G_SELECT %4, %2, %3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p1) = G_SELECT %4(i1), %2, %3 ... @@ -1476,40 +1476,40 @@ body: | ; FAST-LABEL: name: select_p1_scc_vs ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 %2:_(p1) = COPY $sgpr2_sgpr3 %3:_(p1) = COPY $vgpr0_vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p1) = G_SELECT %4, %3, %2 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p1) = G_SELECT %4(i1), %3, %2 ... --- @@ -1521,40 +1521,40 @@ body: | ; FAST-LABEL: name: select_p1_scc_vv ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY4]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 %2:_(p1) = COPY $vgpr0_vgpr1 %3:_(p1) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p1) = G_SELECT %4, %2, %3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p1) = G_SELECT %4(i1), %2, %3 ... --- @@ -1568,34 +1568,34 @@ body: | ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_vcc_ss ; GREEDY: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY3]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(p1) = COPY $sgpr2_sgpr3 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s1) = G_ICMP intpred(ne), %2, %3 - %5:_(p1) = G_SELECT %4, %0, %1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i1) = G_ICMP intpred(ne), %2(i32), %3 + %5:_(p1) = G_SELECT %4(i1), %0, %1 ... --- @@ -1608,35 +1608,35 @@ body: | ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_vcc_sv ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 %3:_(p1) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(p1) = G_SELECT %4, %0, %3 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(p1) = G_SELECT %4(i1), %0, %3 ... --- @@ -1649,35 +1649,35 @@ body: | ; FAST: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_vcc_vs ; GREEDY: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 %3:_(p1) = COPY $vgpr2_vgpr3 - %4:_(s1) = G_ICMP intpred(ne), %1, %2 - %5:_(p1) = G_SELECT %4, %3, %0 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(p1) = G_SELECT %4(i1), %3, %0 ... --- @@ -1689,36 +1689,36 @@ body: | ; FAST-LABEL: name: select_p1_vcc_vv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr4_vgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p1_vcc_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr4_vgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p1) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %2:_(p1) = COPY $vgpr2_vgpr3 %3:_(p1) = COPY $vgpr4_vgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p1) = G_SELECT %4, %2, %3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p1) = G_SELECT %4(i1), %2, %3 ... --- @@ -1730,36 +1730,36 @@ body: | ; FAST-LABEL: name: select_p999_vcc_vv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(p999) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p999) = COPY $vgpr4_vgpr5 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p999) - ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p999) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p999) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p999) + ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p999) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p999) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) ; ; GREEDY-LABEL: name: select_p999_vcc_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(p999) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p999) = COPY $vgpr4_vgpr5 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p999) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p999) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] - ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p999) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](p999) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY3]](p999) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p999) = G_MERGE_VALUES [[SELECT]](i32), [[SELECT1]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 %2:_(p999) = COPY $vgpr2_vgpr3 %3:_(p999) = COPY $vgpr4_vgpr5 - %4:_(s1) = G_ICMP intpred(ne), %0, %1 - %5:_(p999) = G_SELECT %4, %2, %3 + %4:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %5:_(p999) = G_SELECT %4(i1), %2, %3 ... --- @@ -1771,27 +1771,27 @@ body: | ; FAST-LABEL: name: select_s32_vgpr_vv ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY1]], [[COPY2]] ; ; GREEDY-LABEL: name: select_s32_vgpr_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s1) = G_TRUNC %0 - %4:_(s32) = G_SELECT %3, %1, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY1]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i1) = G_TRUNC %0(i32) + %4:_(i32) = G_SELECT %3(i1), %1, %2 ... --- @@ -1803,31 +1803,31 @@ body: | ; FAST-LABEL: name: select_s32_vgpr_ss ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] ; ; GREEDY-LABEL: name: select_s32_vgpr_ss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s1) = G_TRUNC %0 - %4:_(s32) = G_SELECT %3, %1, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i1) = G_TRUNC %0(i32) + %4:_(i32) = G_SELECT %3(i1), %1, %2 ... --- @@ -1839,27 +1839,27 @@ body: | ; FAST-LABEL: name: select_s32_sgpr_vv ; FAST: liveins: $sgpr0, $vgpr0, $vgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY1]], [[COPY2]] ; ; GREEDY-LABEL: name: select_s32_sgpr_vv ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s1) = G_TRUNC %0 - %4:_(s32) = G_SELECT %3, %1, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY1]], [[COPY2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i1) = G_TRUNC %0(i32) + %4:_(i32) = G_SELECT %3(i1), %1, %2 ... --- @@ -1871,29 +1871,29 @@ body: | ; FAST-LABEL: name: select_s32_sgpr_vs ; FAST: liveins: $sgpr0, $vgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY1]], [[COPY4]] ; ; GREEDY-LABEL: name: select_s32_sgpr_vs ; GREEDY: liveins: $sgpr0, $vgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s1) = G_TRUNC %0 - %4:_(s32) = G_SELECT %3, %1, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY1]], [[COPY4]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i1) = G_TRUNC %0(i32) + %4:_(i32) = G_SELECT %3(i1), %1, %2 ... --- @@ -1905,29 +1905,29 @@ body: | ; FAST-LABEL: name: select_s32_sgpr_sv ; FAST: liveins: $sgpr0, $sgpr0, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY2]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY2]] ; ; GREEDY-LABEL: name: select_s32_sgpr_sv ; GREEDY: liveins: $sgpr0, $sgpr0, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s1) = G_TRUNC %0 - %4:_(s32) = G_SELECT %3, %1, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[COPY3]](i1), [[COPY4]], [[COPY2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i1) = G_TRUNC %0(i32) + %4:_(i32) = G_SELECT %3(i1), %1, %2 ... # FIXME: greedy unnecessairly uses vcc/vgpr @@ -1940,25 +1940,25 @@ body: | ; FAST-LABEL: name: select_s32_sgpr_ss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT]](i32), [[COPY1]], [[COPY2]] ; ; GREEDY-LABEL: name: select_s32_sgpr_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY1]], [[COPY2]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s1) = G_TRUNC %0 - %4:_(s32) = G_SELECT %3, %1, %2 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(i32) = G_SELECT [[ZEXT]](i32), [[COPY1]], [[COPY2]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i1) = G_TRUNC %0(i32) + %4:_(i32) = G_SELECT %3(i1), %1, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir index cf0ca2c9eb634..00bec3ab164f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir @@ -13,12 +13,12 @@ body: | ; CHECK-LABEL: name: sext_inreg_s_s32_1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[COPY]], 1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_SEXT_INREG %0, 1 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[COPY]], 1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_SEXT_INREG %0, 1 + S_ENDPGM 0, implicit %1(i32) ... @@ -33,12 +33,12 @@ body: | ; CHECK-LABEL: name: sext_inreg_s_s64_1 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s64) = G_SEXT_INREG [[COPY]], 1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_SEXT_INREG %0, 1 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i64) = G_SEXT_INREG [[COPY]], 1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = G_SEXT_INREG %0, 1 + S_ENDPGM 0, implicit %1(i64) ... @@ -53,12 +53,12 @@ body: | ; CHECK-LABEL: name: sext_inreg_s_s64_31 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s64) = G_SEXT_INREG [[COPY]], 31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_SEXT_INREG %0, 31 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i64) = G_SEXT_INREG [[COPY]], 31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = G_SEXT_INREG %0, 31 + S_ENDPGM 0, implicit %1(i64) ... @@ -73,12 +73,12 @@ body: | ; CHECK-LABEL: name: sext_inreg_s_s64_32 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s64) = G_SEXT_INREG [[COPY]], 32 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_SEXT_INREG %0, 32 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = G_SEXT_INREG %0, 32 + S_ENDPGM 0, implicit %1(i64) ... @@ -93,12 +93,12 @@ body: | ; CHECK-LABEL: name: sext_inreg_s_s64_33 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s64) = G_SEXT_INREG [[COPY]], 32 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = G_SEXT_INREG %0, 32 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = G_SEXT_INREG %0, 32 + S_ENDPGM 0, implicit %1(i64) ... @@ -113,12 +113,12 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s32_1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY]], 1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SEXT_INREG %0, 1 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(i32) = G_SEXT_INREG [[COPY]], 1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_SEXT_INREG %0, 1 + S_ENDPGM 0, implicit %1(i32) ... @@ -133,17 +133,17 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[FREEZE]], 1 - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 1 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i32) = G_FREEZE [[UV]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(i32) = G_SEXT_INREG [[FREEZE]], 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SEXT_INREG]](i32), [[ASHR]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 1 + S_ENDPGM 0, implicit %1(i64) ... @@ -158,17 +158,17 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_31 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[FREEZE]], 31 - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 31 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i32) = G_FREEZE [[UV]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(i32) = G_SEXT_INREG [[FREEZE]], 31 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[SEXT_INREG]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SEXT_INREG]](i32), [[ASHR]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 31 + S_ENDPGM 0, implicit %1(i64) ... @@ -183,16 +183,16 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[FREEZE]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[FREEZE]](s32), [[ASHR]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 32 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(i32) = G_FREEZE [[UV]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[FREEZE]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[FREEZE]](i32), [[ASHR]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 32 + S_ENDPGM 0, implicit %1(i64) ... @@ -207,15 +207,15 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_33 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 33 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(i32) = G_SEXT_INREG [[COPY1]], 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY1]](i32), [[SEXT_INREG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 33 + S_ENDPGM 0, implicit %1(i64) ... @@ -230,15 +230,15 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_35 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 35 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(i32) = G_SEXT_INREG [[COPY1]], 3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY1]](i32), [[SEXT_INREG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 35 + S_ENDPGM 0, implicit %1(i64) ... @@ -253,14 +253,14 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_63 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXT_INREG %0, 63 - S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[UV]](i32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(i32) = G_SEXT_INREG [[COPY1]], 31 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY1]](i32), [[SEXT_INREG]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = G_SEXT_INREG %0, 63 + S_ENDPGM 0, implicit %1(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir index 1e0ba2e79b82e..752d882e0cb5e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: sext_s32_to_s64_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s64) = G_SEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i64) = G_SEXT [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i64) = G_SEXT %0(i32) ... --- @@ -28,12 +28,12 @@ body: | ; CHECK-LABEL: name: sext_s16_to_s64_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i64) = G_SEXT [[TRUNC]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_SEXT %1(i16) ... --- @@ -46,13 +46,13 @@ body: | ; CHECK-LABEL: name: sext_s32_to_s64_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_SEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[COPY1]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY1]](i32), [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_SEXT %0(i32) ... --- @@ -65,15 +65,15 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s16_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_SEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i16) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_SEXT %2(i1) ... --- @@ -86,15 +86,15 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s32_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s32) = G_SEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i32) = G_SEXT %2(i1) ... --- @@ -107,15 +107,15 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s64_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_SEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i64) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_SEXT %2(i1) ... --- @@ -128,17 +128,17 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s16_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_SEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[SELECT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_SEXT %2(i1) ... --- @@ -151,16 +151,16 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s32_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s32) = G_SEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i32) = G_SEXT %2(i1) ... --- @@ -173,18 +173,18 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s64_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_SEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[SELECT]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[COPY2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_SEXT %2(i1) ... --- @@ -197,12 +197,12 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s16_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i16) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i16) = G_SEXT %1(i1) ... --- @@ -215,12 +215,12 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s32_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i1) ... --- @@ -233,12 +233,12 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s64_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i64) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_SEXT %1(i1) ... --- @@ -251,12 +251,12 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s16_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(s16) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(i16) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i16) = G_SEXT %1(i1) ... --- @@ -269,12 +269,12 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s32_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(i32) = G_SEXT [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_SEXT %1(i1) ... --- @@ -287,15 +287,15 @@ body: | ; CHECK-LABEL: name: sext_s1_to_s64_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(i32) = G_SEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[SEXT]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SEXT]](i32), [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_SEXT %1(i1) ... --- @@ -308,13 +308,13 @@ body: | ; CHECK-LABEL: name: sext_s16_to_s64_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_SEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:vgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(i32) = G_ASHR [[SEXT]], [[C]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SEXT]](i32), [[ASHR]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_SEXT %1(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir index b257db4f1e665..26810207c50c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p4) :: (load (i8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 4, align 1) + %1:_(i32) = G_SEXTLOAD %0(p4) :: (load (i8), addrspace 4, align 1) ... --- @@ -32,9 +32,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p4) :: (load (i8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1, align 1) + %1:_(i32) = G_SEXTLOAD %0(p4) :: (load (i8), addrspace 1, align 1) ... --- @@ -50,9 +50,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p4) :: (load (i16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 4, align 2) + %1:_(i32) = G_SEXTLOAD %0(p4) :: (load (i16), addrspace 4, align 2) ... --- @@ -68,9 +68,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p4) :: (load (i16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1, align 2) + %1:_(i32) = G_SEXTLOAD %0(p4) :: (load (i16), addrspace 1, align 2) ... --- @@ -85,9 +85,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p3) :: (load (i8), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3, align 1) + %1:_(i32) = G_SEXTLOAD %0(p3) :: (load (i8), addrspace 3, align 1) ... --- @@ -103,7 +103,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p3) :: (load (i16), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3, align 2) + %1:_(i32) = G_SEXTLOAD %0(p3) :: (load (i16), addrspace 3, align 2) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir index b4290ea0a4203..f85d05eae4a26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: shl_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -32,15 +32,15 @@ body: | ; CHECK-LABEL: name: shl_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY2]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i32) = G_SHL [[COPY2]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -53,15 +53,15 @@ body: | ; CHECK-LABEL: name: shl_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY]], [[COPY2]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i32) = G_SHL [[COPY]], [[COPY2]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -74,14 +74,14 @@ body: | ; CHECK-LABEL: name: shl_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i32) = G_SHL [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SHL %0, %1(i32) + S_ENDPGM 0, implicit %2(i32) ... --- @@ -94,21 +94,21 @@ body: | ; CHECK-LABEL: name: shl_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[ZEXT]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[ANYEXT]], [[ZEXT]](i32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[SHL]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -122,19 +122,19 @@ body: | ; CHECK-LABEL: name: shl_s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[COPY2]], [[TRUNC1]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[COPY2]], [[TRUNC1]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... --- @@ -147,19 +147,19 @@ body: | ; CHECK-LABEL: name: shl_s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY2]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i16) = COPY [[TRUNC1]](i16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[COPY2]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... @@ -173,18 +173,18 @@ body: | ; CHECK-LABEL: name: shl_s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SHL %2, %3 - S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i16) = G_SHL [[TRUNC]], [[TRUNC1]](i16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](i16) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SHL %2, %3(i16) + S_ENDPGM 0, implicit %4(i16) ... @@ -198,22 +198,22 @@ body: | ; CHECK-LABEL: name: shl_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[BITCAST]], [[BITCAST1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:sgpr(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SHL]](s32), [[SHL1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[BITCAST]], [[BITCAST1]](i32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:sgpr(i32) = G_SHL [[LSHR]], [[LSHR1]](i32) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[SHL]](i32), [[SHL1]](i32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -227,15 +227,15 @@ body: | ; CHECK-LABEL: name: shl_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY2]], [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x i16>) = G_SHL [[COPY2]], [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... --- @@ -248,15 +248,15 @@ body: | ; CHECK-LABEL: name: shl_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY2]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x i16>) = G_SHL [[COPY]], [[COPY2]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... @@ -270,13 +270,13 @@ body: | ; CHECK-LABEL: name: shl_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHL %0, %1 - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x i16>) = G_SHL [[COPY]], [[COPY1]](<2 x i16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SHL %0, %1(<2 x i16>) + S_ENDPGM 0, implicit %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir index 66e0d3db24112..178f66997f271 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir @@ -12,11 +12,11 @@ body: | ; CHECK-LABEL: name: sitofp_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s32) = G_SITOFP [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_SITOFP %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(f32) = G_SITOFP [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_SITOFP %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: sitofp_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s32) = G_SITOFP [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_SITOFP %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(f32) = G_SITOFP [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_SITOFP %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir index eee553e4e872e..37969c5b8dca4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir @@ -13,12 +13,12 @@ body: | ; CHECK-LABEL: name: smax_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SMAX %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(i32) = G_SMAX [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SMAX %0, %1 ... --- @@ -32,13 +32,13 @@ body: | ; CHECK-LABEL: name: smax_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_SMAX %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(i32) = G_SMAX [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_SMAX %0, %1 ... --- @@ -52,15 +52,15 @@ body: | ; CHECK-LABEL: name: smax_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(i32) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_SMAX %0, %1 + $vgpr0 = COPY %2(i32) ... @@ -75,14 +75,14 @@ body: | ; CHECK-LABEL: name: smax_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(i32) = G_SMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SMAX %0, %1 + $vgpr0 = COPY %2(i32) ... # FIXME: This should use VGPR instruction @@ -97,14 +97,14 @@ body: | ; CHECK-LABEL: name: smax_s32_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(i32) = G_SMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SMAX %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -118,23 +118,23 @@ body: | ; CHECK-LABEL: name: smax_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $sgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(i32) = G_SMAX [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[SMAX]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... @@ -149,23 +149,23 @@ body: | ; CHECK-LABEL: name: smax_s16_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(i32) = G_SMAX [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[SMAX]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... @@ -180,24 +180,24 @@ body: | ; CHECK-LABEL: name: smax_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]] - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_SMAX %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(i32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:sgpr(i32) = G_SMAX [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](i32), [[SMAX1]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_SMAX %0, %1 + $sgpr0 = COPY %2(<2 x i16>) ... --- @@ -211,15 +211,15 @@ body: | ; CHECK-LABEL: name: smax_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_SMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -233,15 +233,15 @@ body: | ; CHECK-LABEL: name: smax_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -255,12 +255,12 @@ body: | ; CHECK-LABEL: name: smax_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_SMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir index ef60aa81e4923..741bfc7f55410 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir @@ -13,14 +13,14 @@ body: | ; CHECK-LABEL: name: smin_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $sgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SMIN %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(i32) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $sgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SMIN %0, %1 + $sgpr0 = COPY %2(i32) ... --- @@ -34,15 +34,15 @@ body: | ; CHECK-LABEL: name: smin_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(i32) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_SMIN %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -56,15 +56,15 @@ body: | ; CHECK-LABEL: name: smin_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(i32) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_SMIN %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -78,14 +78,14 @@ body: | ; CHECK-LABEL: name: smin_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(i32) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SMIN %0, %1 + $vgpr0 = COPY %2(i32) ... # FIXME: This should use VGPR instruction @@ -100,14 +100,14 @@ body: | ; CHECK-LABEL: name: smin_s32_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(i32) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SMIN %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -121,23 +121,23 @@ body: | ; CHECK-LABEL: name: smin_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $sgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(i32) = G_SMIN [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[SMIN]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... @@ -152,23 +152,23 @@ body: | ; CHECK-LABEL: name: smin_s16_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_SMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:sgpr(i32) = G_SEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(i32) = G_SMIN [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[SMIN]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_SMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... @@ -183,24 +183,24 @@ body: | ; CHECK-LABEL: name: smin_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]] - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_SMIN %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(i32) = G_ASHR [[BITCAST1]], [[C1]](i32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(i32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:sgpr(i32) = G_SMIN [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](i32), [[SMIN1]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_SMIN %0, %1 + $sgpr0 = COPY %2(<2 x i16>) ... --- @@ -214,15 +214,15 @@ body: | ; CHECK-LABEL: name: smin_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -236,15 +236,15 @@ body: | ; CHECK-LABEL: name: smin_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -258,12 +258,12 @@ body: | ; CHECK-LABEL: name: smin_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir index 872e4477edc30..2d77bcb1b4605 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir @@ -16,20 +16,21 @@ body: | ; GFX6-LABEL: name: smulh_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY2]], [[COPY3]] + ; ; GFX9-LABEL: name: smulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:sgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SMULH %0, %1 ... --- @@ -43,20 +44,21 @@ body: | ; GFX6-LABEL: name: smulh_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY1]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY2]], [[COPY1]] + ; ; GFX9-LABEL: name: smulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_SMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_SMULH %0, %1 ... --- @@ -70,20 +72,21 @@ body: | ; GFX6-LABEL: name: smulh_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY]], [[COPY2]] + ; ; GFX9-LABEL: name: smulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_SMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_SMULH %0, %1 ... --- @@ -97,16 +100,17 @@ body: | ; GFX6-LABEL: name: smulh_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + ; ; GFX9-LABEL: name: smulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(i32) = G_SMULH [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SMULH %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir index d15919fb12a73..58c030359c949 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -34,22 +34,23 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i32) from unknown-address + 8, align 8, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX12-LABEL: name: split_smrd_load_range ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8 - ; GFX12-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 8 + ; GFX12-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !3) - $sgpr0_sgpr1_sgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (load (<3 x i32>), align 8, addrspace 4, !range !3) + $sgpr0_sgpr1_sgpr2 = COPY %1(<3 x i32>) ... @@ -64,21 +65,22 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), !tbaa !2, addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x i32>) = G_LOAD [[COPY]](p4) :: (load (<2 x i32>), !tbaa !3, addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(i64) = G_CONSTANT i64 8 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(i32) = G_LOAD [[PTR_ADD]](p4) :: (load (i32) from unknown-address + 8, align 8, !tbaa !3, addrspace 4) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[LOAD]](<2 x i32>) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x i32>) = G_BUILD_VECTOR [[UV]](i32), [[UV1]](i32), [[LOAD1]](i32) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x i32>) + ; ; GFX12-LABEL: name: split_smrd_load_tbaa ; GFX12: liveins: $sgpr0_sgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, !tbaa !2, addrspace 4) - ; GFX12-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:sgpr(<3 x i32>) = G_LOAD [[COPY]](p4) :: (load (<3 x i32>), align 8, !tbaa !3, addrspace 4) + ; GFX12-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[LOAD]](<3 x i32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1) - $sgpr0_sgpr1_sgpr2 = COPY %1 + %1:_(<3 x i32>) = G_LOAD %0(p4) :: (load (<3 x i32>), align 8, !tbaa !2, addrspace 4) + $sgpr0_sgpr1_sgpr2 = COPY %1(<3 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir index eae770ae18dce..fa39dc9347db8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir @@ -12,33 +12,34 @@ body: | ; FAST-LABEL: name: ssube_s32_sss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:sgpr(s32), [[SSUBE1:%[0-9]+]]:sgpr(s32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SSUBE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:sgpr(i32), [[SSUBE1:%[0-9]+]]:sgpr(i32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SSUBE1]](i32) + ; ; GREEDY-LABEL: name: ssube_s32_sss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:sgpr(s32), [[SSUBE1:%[0-9]+]]:sgpr(s32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SSUBE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_SSUBE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:sgpr(i32), [[SSUBE1:%[0-9]+]]:sgpr(i32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SSUBE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_SSUBE %0, %1, %4 ... --- @@ -51,33 +52,34 @@ body: | ; FAST-LABEL: name: ssube_s32_vss ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(i32), [[SSUBE1:%[0-9]+]]:vcc(i1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + ; ; GREEDY-LABEL: name: ssube_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_SSUBE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(i32), [[SSUBE1:%[0-9]+]]:vcc(i1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_SSUBE %0, %1, %4 ... --- name: ssube_s32_ssv @@ -89,30 +91,31 @@ body: | ; FAST-LABEL: name: ssube_s32_ssv ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(i32), [[SSUBE1:%[0-9]+]]:vcc(i1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; ; GREEDY-LABEL: name: ssube_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_SSUBE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(i32), [[SSUBE1:%[0-9]+]]:vcc(i1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_SSUBE %0, %1, %3 ... --- @@ -125,26 +128,27 @@ body: | ; FAST-LABEL: name: ssube_s32_vvs ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(i32), [[SSUBE1:%[0-9]+]]:vcc(i1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + ; ; GREEDY-LABEL: name: ssube_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_SSUBE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(i32), [[SSUBE1:%[0-9]+]]:vcc(i1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_SSUBE %0, %1, %3 ... --- @@ -157,26 +161,27 @@ body: | ; FAST-LABEL: name: ssubee_s32_sss_noscc ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:sgpr(s32), [[SSUBE1:%[0-9]+]]:sgpr(s32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SSUBE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:sgpr(i32), [[SSUBE1:%[0-9]+]]:sgpr(i32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SSUBE1]](i32) + ; ; GREEDY-LABEL: name: ssubee_s32_sss_noscc ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:sgpr(s32), [[SSUBE1:%[0-9]+]]:sgpr(s32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[SSUBE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_SSUBE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:sgpr(i32), [[SSUBE1:%[0-9]+]]:sgpr(i32) = G_SSUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[SSUBE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_SSUBE %0, %1, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir index 4fcd0fdf105d9..20afb02f975a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -11,12 +11,12 @@ body: | ; CHECK-LABEL: name: sub_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_SUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(i32) = G_SUB [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_SUB %0, %1 ... --- @@ -29,13 +29,13 @@ body: | ; CHECK-LABEL: name: sub_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_SUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_SUB %0, %1 ... --- @@ -48,13 +48,13 @@ body: | ; CHECK-LABEL: name: sub_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_SUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_SUB %0, %1 ... --- @@ -67,10 +67,10 @@ body: | ; CHECK-LABEL: name: sub_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_SUB %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_SUB %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir index e65bf44b9c774..881c6bd7e0eda 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: trunc_i64_to_i32_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s32) = G_TRUNC [[COPY]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i32) = G_TRUNC [[COPY]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_TRUNC %0(i64) ... --- @@ -28,10 +28,10 @@ body: | ; CHECK-LABEL: name: trunc_i64_to_i32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i32) = G_TRUNC [[COPY]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_TRUNC %0(i64) ... --- name: trunc_i64_to_i1_s @@ -43,10 +43,10 @@ body: | ; CHECK-LABEL: name: trunc_i64_to_i1_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s1) = G_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i1) = G_TRUNC %0(i64) ... --- @@ -59,10 +59,10 @@ body: | ; CHECK-LABEL: name: trunc_i64_to_i1_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i1) = G_TRUNC %0(i64) ... --- @@ -75,10 +75,10 @@ body: | ; CHECK-LABEL: name: trunc_i32_to_i1_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) ... --- @@ -91,8 +91,8 @@ body: | ; CHECK-LABEL: name: trunc_i32_to_i1_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir index 48a060171ce89..3cc467a75f36a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir @@ -11,33 +11,34 @@ body: | ; FAST-LABEL: name: uadde_s32_sss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + ; ; GREEDY-LABEL: name: uadde_s32_sss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_UADDE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_UADDE %0, %1, %4 ... --- @@ -50,33 +51,34 @@ body: | ; FAST-LABEL: name: uadde_s32_vss ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + ; ; GREEDY-LABEL: name: uadde_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_UADDE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_UADDE %0, %1, %4 ... --- name: uadde_s32_ssv @@ -88,30 +90,31 @@ body: | ; FAST-LABEL: name: uadde_s32_ssv ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; ; GREEDY-LABEL: name: uadde_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_UADDE %0, %1, %3 ... --- @@ -124,26 +127,27 @@ body: | ; FAST-LABEL: name: uadde_s32_vvs ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + ; ; GREEDY-LABEL: name: uadde_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(i32), [[UADDE1:%[0-9]+]]:vcc(i1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_UADDE %0, %1, %3 ... --- @@ -156,26 +160,27 @@ body: | ; FAST-LABEL: name: uadde_s32_sss_noscc ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + ; ; GREEDY-LABEL: name: uadde_s32_sss_noscc ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:sgpr(i32), [[UADDE1:%[0-9]+]]:sgpr(i32) = G_UADDE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_UADDE %0, %1, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir index e53c67bc058bb..fc5b98ca25b2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir @@ -12,13 +12,13 @@ body: | ; CHECK-LABEL: name: uaddo_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDO1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(i32), [[UADDO1:%[0-9]+]]:sgpr(i32) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[UADDO1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32), %3:_(i1) = G_UADDO %0, %1 ... --- @@ -31,13 +31,13 @@ body: | ; CHECK-LABEL: name: uaddo_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32), %3:_(i1) = G_UADDO %0, %1 ... --- @@ -50,13 +50,13 @@ body: | ; CHECK-LABEL: name: uaddo_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32), %3:_(i1) = G_UADDO %0, %1 ... --- @@ -69,10 +69,10 @@ body: | ; CHECK-LABEL: name: uaddo_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(i32), [[UADDO1:%[0-9]+]]:vcc(i1) = G_UADDO [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_UADDO %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir index 20d280680a09d..d352ad8b1de81 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir @@ -17,16 +17,16 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_UBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(i32) = G_UBFX [[COPY]], [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = G_UBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... --- @@ -40,18 +40,18 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_vii ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 10 - %2:_(s32) = G_CONSTANT i32 4 - %3:_(s32) = G_UBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(i32) = G_UBFX [[COPY]], [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 10 + %2:_(i32) = G_CONSTANT i32 4 + %3:_(i32) = G_UBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... --- @@ -65,18 +65,18 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_vss ; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_UBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY2]](i32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(i32) = G_UBFX [[COPY]], [[COPY3]](i32), [[COPY4]] + ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_UBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... # Expand to a sequence that implements the 64-bit bitfield extract using @@ -92,21 +92,21 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_vvv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = COPY $vgpr3 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[LSHR]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[C]], [[COPY2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[LSHR]], [[SUB]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(i64) = G_LSHR [[SHL]], [[SUB]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr2 + %2:_(i32) = COPY $vgpr3 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -120,21 +120,21 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_vss ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[LSHR]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[C]], [[COPY2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[LSHR]], [[SUB]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(i64) = G_LSHR [[SHL]], [[SUB]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... # If the offset and width are constants, use the 32-bit bitfield extract, @@ -150,22 +150,22 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_vii_small ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV]], [[C2]](s32), [[COPY2]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UBFX]](s32), [[C2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 31 - %2:_(s32) = G_CONSTANT i32 4 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[LSHR]](i64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(i32) = G_UBFX [[UV]], [[C2]](i32), [[COPY2]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UBFX]](i32), [[C2]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 31 + %2:_(i32) = G_CONSTANT i32 4 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -179,23 +179,23 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_vii_big ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV1]], [[C2]](s32), [[C3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UV]](s32), [[UBFX]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_CONSTANT i32 8 - %2:_(s32) = G_CONSTANT i32 40 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 40 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C1]](i32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i64) = G_LSHR [[COPY]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[LSHR]](i64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(i32) = G_UBFX [[UV1]], [[C2]](i32), [[C3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[UV]](i32), [[UBFX]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32) = G_CONSTANT i32 8 + %2:_(i32) = G_CONSTANT i32 40 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... --- @@ -209,22 +209,22 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_svv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY3]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $vgpr0_vgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i64) = COPY [[COPY]](i64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(i64) = G_LSHR [[COPY3]], [[COPY1]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[LSHR]](i64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(i32) = G_SUB [[C]], [[COPY2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(i64) = G_SHL [[LSHR]], [[SUB]](i32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(i64) = G_LSHR [[SHL]], [[SUB]](i32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $vgpr0_vgpr1 = COPY %3(i64) ... # Expand to a sequence that combines the offset and width for the two operand @@ -240,17 +240,17 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = G_UBFX %0, %1(s32), %2 - $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(i32) = G_UBFX [[COPY3]], [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = COPY $vgpr1 + %3:_(i32) = G_UBFX %0, %1(i32), %2 + $vgpr0 = COPY %3(i32) ... --- @@ -264,21 +264,21 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_U32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_UBFX %0, %1(s32), %2 - $sgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(i32) = S_BFE_U32 [[COPY]](i32), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_U32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_UBFX %0, %1(i32), %2 + $sgpr0 = COPY %3(i32) ... --- @@ -292,21 +292,21 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_sii ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_U32_]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 10 - %3:_(s32) = G_UBFX %0, %1(s32), %2 - $sgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[C]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C1]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(i32) = S_BFE_U32 [[COPY]](i32), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_U32_]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 10 + %3:_(i32) = G_UBFX %0, %1(i32), %2 + $sgpr0 = COPY %3(i32) ... # Expand to a sequence that combines the offset and width for the two operand @@ -322,21 +322,21 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_sss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $sgpr3 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $sgpr0_sgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[COPY2]], [[C1]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(i64) = S_BFE_U64 [[COPY]](i64), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $sgpr3 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $sgpr0_sgpr1 = COPY %3(i64) ... --- @@ -350,19 +350,19 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_sii ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_CONSTANT i32 10 - %3:_(s64) = G_UBFX %0, %1(s32), %2 - $sgpr0_sgpr1 = COPY %3(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[C]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(i32) = G_SHL [[C1]], [[C3]](i32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(i32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(i64) = S_BFE_U64 [[COPY]](i64), [[OR]](i32), implicit-def $scc + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_CONSTANT i32 1 + %2:_(i32) = G_CONSTANT i32 10 + %3:_(i64) = G_UBFX %0, %1(i32), %2 + $sgpr0_sgpr1 = COPY %3(i64) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir index 554c88a68972b..6b354eada7f1a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir @@ -12,11 +12,11 @@ body: | ; CHECK-LABEL: name: uitofp_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_UITOFP %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(f32) = G_UITOFP [[COPY1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(f32) = G_UITOFP %0(i32) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: uitofp_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_UITOFP %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(f32) = G_UITOFP [[COPY]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(f32) = G_UITOFP %0(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir index 36a38aac1ccaa..29f86fc227641 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir @@ -13,14 +13,14 @@ body: | ; CHECK-LABEL: name: umax_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $sgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_UMAX %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(i32) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $sgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_UMAX %0, %1 + $sgpr0 = COPY %2(i32) ... --- @@ -34,15 +34,15 @@ body: | ; CHECK-LABEL: name: umax_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(i32) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_UMAX %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -56,15 +56,15 @@ body: | ; CHECK-LABEL: name: umax_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(i32) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_UMAX %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -78,14 +78,14 @@ body: | ; CHECK-LABEL: name: umax_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(i32) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UMAX %0, %1 + $vgpr0 = COPY %2(i32) ... # FIXME: This should use VGPR instruction @@ -100,14 +100,14 @@ body: | ; CHECK-LABEL: name: umax_s32_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(i32) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_UMAX %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -121,23 +121,23 @@ body: | ; CHECK-LABEL: name: umax_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $sgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(i32) = G_UMAX [[ZEXT]], [[ZEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[UMAX]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... @@ -152,23 +152,23 @@ body: | ; CHECK-LABEL: name: umax_s16_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMAX %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(i32) = G_UMAX [[ZEXT]], [[ZEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[UMAX]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMAX %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... @@ -183,26 +183,26 @@ body: | ; CHECK-LABEL: name: umax_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]] - ; CHECK-NEXT: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_UMAX %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST1]], [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[BITCAST1]], [[C3]] + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(i32) = G_UMAX [[AND]], [[AND1]] + ; CHECK-NEXT: [[UMAX1:%[0-9]+]]:sgpr(i32) = G_UMAX [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](i32), [[UMAX1]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_UMAX %0, %1 + $sgpr0 = COPY %2(<2 x i16>) ... --- @@ -216,15 +216,15 @@ body: | ; CHECK-LABEL: name: umax_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -238,15 +238,15 @@ body: | ; CHECK-LABEL: name: umax_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -260,12 +260,12 @@ body: | ; CHECK-LABEL: name: umax_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UMAX %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x i16>) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir index bb232b5e07651..b6ba3467c6745 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir @@ -13,14 +13,14 @@ body: | ; CHECK-LABEL: name: umin_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $sgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_UMIN %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(i32) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $sgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_UMIN %0, %1 + $sgpr0 = COPY %2(i32) ... @@ -35,15 +35,15 @@ body: | ; CHECK-LABEL: name: umin_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(i32) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_UMIN %0, %1 + $vgpr0 = COPY %2(i32) ... @@ -58,15 +58,15 @@ body: | ; CHECK-LABEL: name: umin_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(i32) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_UMIN %0, %1 + $vgpr0 = COPY %2(i32) ... @@ -81,14 +81,14 @@ body: | ; CHECK-LABEL: name: umin_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(i32) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UMIN %0, %1 + $vgpr0 = COPY %2(i32) ... @@ -104,14 +104,14 @@ body: | ; CHECK-LABEL: name: umin_s32_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(i32) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_UMIN %0, %1 + $vgpr0 = COPY %2(i32) ... --- @@ -125,23 +125,23 @@ body: | ; CHECK-LABEL: name: umin_s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $sgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(i32) = G_UMIN [[ZEXT]], [[ZEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[UMIN]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $sgpr0 = COPY %5(i32) ... @@ -156,23 +156,23 @@ body: | ; CHECK-LABEL: name: umin_s16_ss_vgpr_use ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_UMIN %2, %3 - %5:_(s32) = G_ANYEXT %4 - $vgpr0 = COPY %5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC1]](i16) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(i32) = G_UMIN [[ZEXT]], [[ZEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i16) = G_TRUNC [[UMIN]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC2]](i16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i16) = G_TRUNC %0(i32) + %3:_(i16) = G_TRUNC %1(i32) + %4:_(i16) = G_UMIN %2, %3 + %5:_(i32) = G_ANYEXT %4(i16) + $vgpr0 = COPY %5(i32) ... @@ -187,26 +187,26 @@ body: | ; CHECK-LABEL: name: umin_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[AND]], [[AND1]] - ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:sgpr(s32) = G_UMIN [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](s32), [[UMIN1]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_UMIN %0, %1 - $sgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST]], [[C]](i32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(i32) = G_BITCAST [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(i32) = G_LSHR [[BITCAST1]], [[C2]](i32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(i32) = G_AND [[BITCAST1]], [[C3]] + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(i32) = G_UMIN [[AND]], [[AND1]] + ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:sgpr(i32) = G_UMIN [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x i16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](i32), [[UMIN1]](i32) + ; CHECK-NEXT: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_UMIN %0, %1 + $sgpr0 = COPY %2(<2 x i16>) ... --- @@ -220,15 +220,15 @@ body: | ; CHECK-LABEL: name: umin_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -242,15 +242,15 @@ body: | ; CHECK-LABEL: name: umin_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... --- @@ -264,12 +264,12 @@ body: | ; CHECK-LABEL: name: umin_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_UMIN %0, %1 - $vgpr0 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x i16>) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x i16>) + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2(<2 x i16>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir index a5a3546dbcb23..07952f551805e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir @@ -16,20 +16,21 @@ body: | ; GFX6-LABEL: name: umulh_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY2]], [[COPY3]] + ; ; GFX9-LABEL: name: umulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_UMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:sgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_UMULH %0, %1 ... --- @@ -43,20 +44,21 @@ body: | ; GFX6-LABEL: name: umulh_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY2]], [[COPY1]] + ; ; GFX9-LABEL: name: umulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_UMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_UMULH %0, %1 ... --- @@ -70,20 +72,21 @@ body: | ; GFX6-LABEL: name: umulh_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY]], [[COPY2]] + ; ; GFX9-LABEL: name: umulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_UMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_UMULH %0, %1 ... --- @@ -97,16 +100,17 @@ body: | ; GFX6-LABEL: name: umulh_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + ; ; GFX9-LABEL: name: umulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_UMULH %0, %1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(i32) = G_UMULH [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_UMULH %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir index 8159f1b982c36..d09ec7cf761fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir @@ -15,76 +15,77 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](s64) - ; GFX7-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from unknown-address + 32, align 4, addrspace 1) - ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](s64) - ; GFX7-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from unknown-address + 48, align 4, addrspace 1) - ; GFX7-NEXT: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX7-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) - ; GFX7-NEXT: G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: %cst16:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64) - ; GFX7-NEXT: G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: %cst32:sgpr(s64) = G_CONSTANT i64 32 - ; GFX7-NEXT: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64) - ; GFX7-NEXT: G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: %cst48:sgpr(s64) = G_CONSTANT i64 48 - ; GFX7-NEXT: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64) - ; GFX7-NEXT: G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD %in_addr(p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; GFX7-NEXT: [[C1:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 32 + ; GFX7-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](i64) + ; GFX7-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x i32>) from unknown-address + 32, align 4, addrspace 1) + ; GFX7-NEXT: [[C2:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 48 + ; GFX7-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](i64) + ; GFX7-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x i32>) from unknown-address + 48, align 4, addrspace 1) + ; GFX7-NEXT: %load:vgpr(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX7-NEXT: %load0_3:vgpr(<4 x i32>), %load4_7:vgpr(<4 x i32>), %load8_11:vgpr(<4 x i32>), %load12_15:vgpr(<4 x i32>) = G_UNMERGE_VALUES %load(<16 x i32>) + ; GFX7-NEXT: G_STORE %load0_3(<4 x i32>), %out_addr(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX7-NEXT: %cst16:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(i64) + ; GFX7-NEXT: G_STORE %load4_7(<4 x i32>), %out_addr_plus_16(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX7-NEXT: %cst32:sgpr(i64) = G_CONSTANT i64 32 + ; GFX7-NEXT: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(i64) + ; GFX7-NEXT: G_STORE %load8_11(<4 x i32>), %out_addr_plus_32(p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX7-NEXT: %cst48:sgpr(i64) = G_CONSTANT i64 48 + ; GFX7-NEXT: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(i64) + ; GFX7-NEXT: G_STORE %load12_15(<4 x i32>), %out_addr_plus_48(p1) :: (store (<4 x i32>), align 4, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 + ; ; GFX1010-LABEL: name: test_uniform_load_without_noclobber ; GFX1010: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX1010-NEXT: {{ $}} ; GFX1010-NEXT: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX1010-NEXT: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3 ; GFX1010-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY %in_addr(p1) - ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64) - ; GFX1010-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from unknown-address + 16, align 4, addrspace 1) - ; GFX1010-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GFX1010-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](s64) - ; GFX1010-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from unknown-address + 32, align 4, addrspace 1) - ; GFX1010-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GFX1010-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](s64) - ; GFX1010-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from unknown-address + 48, align 4, addrspace 1) - ; GFX1010-NEXT: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) - ; GFX1010-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) + ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD %in_addr(p1) :: (load (<4 x i32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](i64) + ; GFX1010-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x i32>) from unknown-address + 16, align 4, addrspace 1) + ; GFX1010-NEXT: [[C1:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 32 + ; GFX1010-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](i64) + ; GFX1010-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x i32>) from unknown-address + 32, align 4, addrspace 1) + ; GFX1010-NEXT: [[C2:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 48 + ; GFX1010-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](i64) + ; GFX1010-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x i32>) from unknown-address + 48, align 4, addrspace 1) + ; GFX1010-NEXT: %load:vgpr(<16 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>), [[LOAD2]](<4 x i32>), [[LOAD3]](<4 x i32>) + ; GFX1010-NEXT: %load0_3:vgpr(<4 x i32>), %load4_7:vgpr(<4 x i32>), %load8_11:vgpr(<4 x i32>), %load12_15:vgpr(<4 x i32>) = G_UNMERGE_VALUES %load(<16 x i32>) ; GFX1010-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out_addr(p1) - ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX1010-NEXT: %cst16:sgpr(s64) = G_CONSTANT i64 16 - ; GFX1010-NEXT: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64) + ; GFX1010-NEXT: G_STORE %load0_3(<4 x i32>), [[COPY1]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX1010-NEXT: %cst16:sgpr(i64) = G_CONSTANT i64 16 + ; GFX1010-NEXT: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(i64) ; GFX1010-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_16(p1) - ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX1010-NEXT: %cst32:sgpr(s64) = G_CONSTANT i64 32 - ; GFX1010-NEXT: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64) + ; GFX1010-NEXT: G_STORE %load4_7(<4 x i32>), [[COPY2]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX1010-NEXT: %cst32:sgpr(i64) = G_CONSTANT i64 32 + ; GFX1010-NEXT: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(i64) ; GFX1010-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_32(p1) - ; GFX1010-NEXT: G_STORE %load8_11(<4 x s32>), [[COPY3]](p1) :: (store (<4 x s32>), align 4, addrspace 1) - ; GFX1010-NEXT: %cst48:sgpr(s64) = G_CONSTANT i64 48 - ; GFX1010-NEXT: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64) + ; GFX1010-NEXT: G_STORE %load8_11(<4 x i32>), [[COPY3]](p1) :: (store (<4 x i32>), align 4, addrspace 1) + ; GFX1010-NEXT: %cst48:sgpr(i64) = G_CONSTANT i64 48 + ; GFX1010-NEXT: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(i64) ; GFX1010-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_48(p1) - ; GFX1010-NEXT: G_STORE %load12_15(<4 x s32>), [[COPY4]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: G_STORE %load12_15(<4 x i32>), [[COPY4]](p1) :: (store (<4 x i32>), align 4, addrspace 1) ; GFX1010-NEXT: S_ENDPGM 0 %in_addr:_(p1) = COPY $sgpr0_sgpr1 %out_addr:_(p1) = COPY $sgpr2_sgpr3 - %load:_(<16 x s32>) = G_LOAD %in_addr(p1) :: (load (<16 x s32>), align 4, addrspace 1) - %load0_3:_(<4 x s32>), %load4_7:_(<4 x s32>), %load8_11:_(<4 x s32>), %load12_15:_(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) - G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %cst16:_(s64) = G_CONSTANT i64 16 - %out_addr_plus_16:_(p1) = G_PTR_ADD %out_addr, %cst16(s64) - G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %cst32:_(s64) = G_CONSTANT i64 32 - %out_addr_plus_32:_(p1) = G_PTR_ADD %out_addr, %cst32(s64) - G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store (<4 x s32>), align 4, addrspace 1) - %cst48:_(s64) = G_CONSTANT i64 48 - %out_addr_plus_48:_(p1) = G_PTR_ADD %out_addr, %cst48(s64) - G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store (<4 x s32>), align 4, addrspace 1) + %load:_(<16 x i32>) = G_LOAD %in_addr(p1) :: (load (<16 x i32>), align 4, addrspace 1) + %load0_3:_(<4 x i32>), %load4_7:_(<4 x i32>), %load8_11:_(<4 x i32>), %load12_15:_(<4 x i32>) = G_UNMERGE_VALUES %load(<16 x i32>) + G_STORE %load0_3(<4 x i32>), %out_addr(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %cst16:_(i64) = G_CONSTANT i64 16 + %out_addr_plus_16:_(p1) = G_PTR_ADD %out_addr, %cst16(i64) + G_STORE %load4_7(<4 x i32>), %out_addr_plus_16(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %cst32:_(i64) = G_CONSTANT i64 32 + %out_addr_plus_32:_(p1) = G_PTR_ADD %out_addr, %cst32(i64) + G_STORE %load8_11(<4 x i32>), %out_addr_plus_32(p1) :: (store (<4 x i32>), align 4, addrspace 1) + %cst48:_(i64) = G_CONSTANT i64 48 + %out_addr_plus_48:_(p1) = G_PTR_ADD %out_addr, %cst48(i64) + G_STORE %load12_15(<4 x i32>), %out_addr_plus_48(p1) :: (store (<4 x i32>), align 4, addrspace 1) S_ENDPGM 0 ... @@ -101,43 +102,44 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: %out:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load (<4 x s32>), align 1, addrspace 4) - ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64) - ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, align 1, addrspace 4) - ; GFX7-NEXT: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX7-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) - ; GFX7-NEXT: G_STORE %load0_3(<4 x s32>), %out(p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; GFX7-NEXT: %cst_16:sgpr(s64) = G_CONSTANT i64 16 - ; GFX7-NEXT: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64) - ; GFX7-NEXT: G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store (<4 x s32>), align 32, addrspace 1) + ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD %ptr(p4) :: (load (<4 x i32>), align 1, addrspace 4) + ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](i64) + ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x i32>) from unknown-address + 16, align 1, addrspace 4) + ; GFX7-NEXT: %load:vgpr(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX7-NEXT: %load0_3:vgpr(<4 x i32>), %load4_7:vgpr(<4 x i32>) = G_UNMERGE_VALUES %load(<8 x i32>) + ; GFX7-NEXT: G_STORE %load0_3(<4 x i32>), %out(p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; GFX7-NEXT: %cst_16:sgpr(i64) = G_CONSTANT i64 16 + ; GFX7-NEXT: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(i64) + ; GFX7-NEXT: G_STORE %load4_7(<4 x i32>), %out_plus_16(p1) :: (store (<4 x i32>), align 32, addrspace 1) ; GFX7-NEXT: S_ENDPGM 0 + ; ; GFX1010-LABEL: name: test_s_load_constant_v8i32_align1 ; GFX1010: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX1010-NEXT: {{ $}} ; GFX1010-NEXT: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX1010-NEXT: %out:sgpr(p1) = COPY $sgpr2_sgpr3 ; GFX1010-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %ptr(p4) - ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load (<4 x s32>), align 1, addrspace 4) - ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64) - ; GFX1010-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, align 1, addrspace 4) - ; GFX1010-NEXT: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GFX1010-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) + ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD %ptr(p4) :: (load (<4 x i32>), align 1, addrspace 4) + ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(i64) = G_CONSTANT i64 16 + ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](i64) + ; GFX1010-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x i32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x i32>) from unknown-address + 16, align 1, addrspace 4) + ; GFX1010-NEXT: %load:vgpr(<8 x i32>) = G_CONCAT_VECTORS [[LOAD]](<4 x i32>), [[LOAD1]](<4 x i32>) + ; GFX1010-NEXT: %load0_3:vgpr(<4 x i32>), %load4_7:vgpr(<4 x i32>) = G_UNMERGE_VALUES %load(<8 x i32>) ; GFX1010-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out(p1) - ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 32, addrspace 1) - ; GFX1010-NEXT: %cst_16:sgpr(s64) = G_CONSTANT i64 16 - ; GFX1010-NEXT: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64) + ; GFX1010-NEXT: G_STORE %load0_3(<4 x i32>), [[COPY1]](p1) :: (store (<4 x i32>), align 32, addrspace 1) + ; GFX1010-NEXT: %cst_16:sgpr(i64) = G_CONSTANT i64 16 + ; GFX1010-NEXT: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(i64) ; GFX1010-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_plus_16(p1) - ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 32, addrspace 1) + ; GFX1010-NEXT: G_STORE %load4_7(<4 x i32>), [[COPY2]](p1) :: (store (<4 x i32>), align 32, addrspace 1) ; GFX1010-NEXT: S_ENDPGM 0 %ptr:_(p4) = COPY $sgpr0_sgpr1 %out:_(p1) = COPY $sgpr2_sgpr3 - %load:_(<8 x s32>) = G_LOAD %ptr(p4) :: (load (<8 x s32>), align 1, addrspace 4) - %load0_3:_(<4 x s32>), %load4_7:_(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) - G_STORE %load0_3(<4 x s32>), %out(p1) :: (store (<4 x s32>), align 32, addrspace 1) - %cst_16:_(s64) = G_CONSTANT i64 16 - %out_plus_16:_(p1) = G_PTR_ADD %out, %cst_16(s64) - G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store (<4 x s32>), basealign 32, addrspace 1) + %load:_(<8 x i32>) = G_LOAD %ptr(p4) :: (load (<8 x i32>), align 1, addrspace 4) + %load0_3:_(<4 x i32>), %load4_7:_(<4 x i32>) = G_UNMERGE_VALUES %load(<8 x i32>) + G_STORE %load0_3(<4 x i32>), %out(p1) :: (store (<4 x i32>), align 32, addrspace 1) + %cst_16:_(i64) = G_CONSTANT i64 16 + %out_plus_16:_(p1) = G_PTR_ADD %out, %cst_16(i64) + G_STORE %load4_7(<4 x i32>), %out_plus_16(p1) :: (store (<4 x i32>), align 32, addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-unmerge-values.mir index 8e2a60502350e..81a93bb7b3a08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-unmerge-values.mir @@ -11,14 +11,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s64_s32_s ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64) - $vgpr0 = COPY %1(s32) - $vgpr2 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(i64) + $vgpr0 = COPY %1(i32) + $vgpr2 = COPY %1(i32) ... --- @@ -31,14 +31,14 @@ body: | ; CHECK-LABEL: name: test_unmerge_s64_s32_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64) - $vgpr0 = COPY %1(s32) - $vgpr2 = COPY %1(s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(i64) + $vgpr0 = COPY %1(i32) + $vgpr2 = COPY %1(i32) ... --- @@ -51,12 +51,12 @@ body: | ; CHECK-LABEL: name: test_unmerge_s32_s64_a ; CHECK: liveins: $agpr0_agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:agpr(s32), [[UV1:%[0-9]+]]:agpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: $agpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $agpr2 = COPY [[UV1]](s32) - %0:_(s64) = COPY $agpr0_agpr1 - %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64) - $agpr0 = COPY %1 - $agpr2 = COPY %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(i64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:agpr(i32), [[UV1:%[0-9]+]]:agpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: $agpr0 = COPY [[UV]](i32) + ; CHECK-NEXT: $agpr2 = COPY [[UV1]](i32) + %0:_(i64) = COPY $agpr0_agpr1 + %1:_(i32), %2:_(i32) = G_UNMERGE_VALUES %0(i64) + $agpr0 = COPY %1(i32) + $agpr2 = COPY %2(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir index 8c7a029d3693e..2d39176b52f0a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir @@ -12,33 +12,34 @@ body: | ; FAST-LABEL: name: usube_s32_sss ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:sgpr(i32), [[USUBE1:%[0-9]+]]:sgpr(i32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[USUBE1]](i32) + ; ; GREEDY-LABEL: name: usube_s32_sss ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_USUBE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:sgpr(i32), [[USUBE1:%[0-9]+]]:sgpr(i32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[USUBE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_USUBE %0, %1, %4 ... --- @@ -51,33 +52,34 @@ body: | ; FAST-LABEL: name: usube_s32_vss ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(i32), [[USUBE1:%[0-9]+]]:vcc(i1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + ; ; GREEDY-LABEL: name: usube_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32), %6:_(s1) = G_USUBE %0, %1, %4 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY2]](i32), [[C]] + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(i32), [[USUBE1:%[0-9]+]]:vcc(i1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = COPY $sgpr1 + %3:_(i32) = G_CONSTANT i32 0 + %4:_(i1) = G_ICMP intpred(eq), %2(i32), %3 + %5:_(i32), %6:_(i1) = G_USUBE %0, %1, %4 ... --- name: usube_s32_ssv @@ -89,30 +91,31 @@ body: | ; FAST-LABEL: name: usube_s32_ssv ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(i32), [[USUBE1:%[0-9]+]]:vcc(i1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; ; GREEDY-LABEL: name: usube_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(i32), [[USUBE1:%[0-9]+]]:vcc(i1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_USUBE %0, %1, %3 ... --- @@ -125,26 +128,27 @@ body: | ; FAST-LABEL: name: usube_s32_vvs ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(i32), [[USUBE1:%[0-9]+]]:vcc(i1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + ; ; GREEDY-LABEL: name: usube_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr0 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(i32), [[USUBE1:%[0-9]+]]:vcc(i1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $sgpr0 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_USUBE %0, %1, %3 ... --- @@ -157,26 +161,27 @@ body: | ; FAST-LABEL: name: usube_s32_sss_noscc ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32) + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:sgpr(i32), [[USUBE1:%[0-9]+]]:sgpr(i32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[USUBE1]](i32) + ; ; GREEDY-LABEL: name: usube_s32_sss_noscc ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] - ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s1) = G_TRUNC %2 - %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY2]](i32) + ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:sgpr(i32), [[USUBE1:%[0-9]+]]:sgpr(i32) = G_USUBE [[COPY]], [[COPY1]], [[ZEXT]] + ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[USUBE1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i1) = G_TRUNC %2(i32) + %4:_(i32), %5:_(i1) = G_USUBE %0, %1, %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir index 1e0129172ebcf..fe66bad9e54d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir @@ -12,13 +12,13 @@ body: | ; CHECK-LABEL: name: usubo_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBO1]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(i32), [[USUBO1:%[0-9]+]]:sgpr(i32) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[USUBO1]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32), %3:_(i1) = G_USUBO %0, %1 ... --- @@ -31,13 +31,13 @@ body: | ; CHECK-LABEL: name: usubo_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(i32), [[USUBO1:%[0-9]+]]:vcc(i1) = G_USUBO [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32), %3:_(i1) = G_USUBO %0, %1 ... --- @@ -50,13 +50,13 @@ body: | ; CHECK-LABEL: name: usubo_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(i32), [[USUBO1:%[0-9]+]]:vcc(i1) = G_USUBO [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32), %3:_(i1) = G_USUBO %0, %1 ... --- @@ -69,10 +69,10 @@ body: | ; CHECK-LABEL: name: usubo_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(i32), [[USUBO1:%[0-9]+]]:vcc(i1) = G_USUBO [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32), %3:_(i1) = G_USUBO %0, %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir index b145a62318e30..6081f1cd4b3f0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -17,29 +17,29 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: %rsrc:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: %agpr:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: %voffset:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: %zero:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY %zero(s32) + ; CHECK-NEXT: %val:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: %rsrc:sgpr(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: %agpr:agpr(i32) = COPY $agpr0 + ; CHECK-NEXT: %voffset:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: %zero:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY %zero(i32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY %agpr(s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(i32) = COPY %agpr(i32) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[COPY1]](i32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](i32), [[COPY1]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE %val(i32), %rsrc(<4 x i32>), [[COPY]](i32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (i32), addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} @@ -50,12 +50,12 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .4: ; CHECK-NEXT: S_ENDPGM 0 - %val:_(s32) = COPY $vgpr0 - %rsrc:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %agpr:_(s32) = COPY $agpr0 - %voffset:_(s32) = COPY $vgpr1 - %zero:_(s32) = G_CONSTANT i32 0 - G_AMDGPU_BUFFER_STORE %val, %rsrc, %zero, %voffset, %agpr, 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) + %val:_(i32) = COPY $vgpr0 + %rsrc:_(<4 x i32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %agpr:_(i32) = COPY $agpr0 + %voffset:_(i32) = COPY $vgpr1 + %zero:_(i32) = G_CONSTANT i32 0 + G_AMDGPU_BUFFER_STORE %val(i32), %rsrc(<4 x i32>), %zero(i32), %voffset, %agpr, 0, 0, 0 :: (dereferenceable store (i32), addrspace 4) S_ENDPGM 0 ... @@ -72,42 +72,42 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<8 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x i32>) = COPY [[COPY]](<8 x i32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(i32), [[UV1:%[0-9]+]]:vgpr_32(i32), [[UV2:%[0-9]+]]:vgpr_32(i32), [[UV3:%[0-9]+]]:vgpr_32(i32), [[UV4:%[0-9]+]]:vgpr_32(i32), [[UV5:%[0-9]+]]:vgpr_32(i32), [[UV6:%[0-9]+]]:vgpr_32(i32), [[UV7:%[0-9]+]]:vgpr_32(i32) = G_UNMERGE_VALUES [[COPY2]](<8 x i32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV1]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV2]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV3]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV4]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV5]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV6]](i32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(i32) = V_READFIRSTLANE_B32 [[UV7]](i32), implicit $exec + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x i32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](i32), [[V_READFIRSTLANE_B32_1]](i32), [[V_READFIRSTLANE_B32_2]](i32), [[V_READFIRSTLANE_B32_3]](i32), [[V_READFIRSTLANE_B32_4]](i32), [[V_READFIRSTLANE_B32_5]](i32), [[V_READFIRSTLANE_B32_6]](i32), [[V_READFIRSTLANE_B32_7]](i32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(i64), [[UV9:%[0-9]+]]:vgpr(i64), [[UV10:%[0-9]+]]:vgpr(i64), [[UV11:%[0-9]+]]:vgpr(i64) = G_UNMERGE_VALUES [[COPY2]](<8 x i32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(i64), [[UV13:%[0-9]+]]:sgpr(i64), [[UV14:%[0-9]+]]:sgpr(i64), [[UV15:%[0-9]+]]:sgpr(i64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x i32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV12]](i64), [[UV8]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV13]](i64), [[UV9]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(i1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV14]](i64), [[UV10]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(i1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[UV15]](i64), [[UV11]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(i1) = G_AND [[AND1]], [[ICMP3]] + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(i64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](i1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](i64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY1]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>)) + ; CHECK-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY1]](i32), [[BUILD_VECTOR]](<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x i32>)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} @@ -117,9 +117,9 @@ body: | ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .4: - ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - %0:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - %1:_(s32) = COPY $vgpr0 - %2:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %1(s32), %0(<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>)) - S_ENDPGM 0, implicit %2 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x i32>) + %0:_(<8 x i32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + %1:_(i32) = COPY $vgpr0 + %2:_(<4 x i32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %1(i32), %0(<8 x i32>), 0, 0, 0 :: (dereferenceable load (<4 x i32>)) + S_ENDPGM 0, implicit %2(<4 x i32>) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir index f1f8d0b6b9df5..bf289cea80219 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir @@ -13,23 +13,25 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), align 8, addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), align 8, addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: constant_load_i8_align8 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), align 8, addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), align 8, addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: constant_load_i8_align8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), align 8, addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 8, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), align 8, addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i8), align 8, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_load_i8_align4 @@ -42,23 +44,25 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: constant_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: constant_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 4, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i8), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_load_i16_align4 @@ -71,23 +75,25 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: constant_load_i16_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: constant_load_i16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 4, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i16), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_sextload_i8_align4 @@ -100,26 +106,28 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 8 + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + ; ; GFX9-LABEL: name: constant_sextload_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 8 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + ; ; GFX10-LABEL: name: constant_sextload_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 4, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 8 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (invariant load (i8), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_sextload_i16_align4 @@ -132,26 +140,28 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 16 - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 16 + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + ; ; GFX9-LABEL: name: constant_sextload_i16_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 16 - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 16 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + ; ; GFX10-LABEL: name: constant_sextload_i16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 16 - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s16), align 4, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 16 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (invariant load (i16), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- @@ -165,29 +175,31 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[AND]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + ; ; GFX9-LABEL: name: constant_zextload_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: S_ENDPGM 0, implicit [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + ; ; GFX10-LABEL: name: constant_zextload_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255 - ; GFX10-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s8), align 4, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (invariant load (i8), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_zextload_i16_align4 @@ -200,29 +212,31 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[AND]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + ; ; GFX9-LABEL: name: constant_zextload_i16_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: S_ENDPGM 0, implicit [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX9-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + ; ; GFX10-LABEL: name: constant_zextload_i16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 4, addrspace 4) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 4) + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (invariant load (i16), align 4, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: global_load_i8_align4 @@ -235,23 +249,25 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: global_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: global_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 4, addrspace 1) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i8), align 4, addrspace 1) + S_ENDPGM 0, implicit %1(i32) ... --- name: global_load_i16_align4 @@ -264,23 +280,25 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: global_load_i16_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: global_load_i16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 4, addrspace 1) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i16), align 4, addrspace 1) + S_ENDPGM 0, implicit %1(i32) ... --- name: global_sextload_i8_alig4 @@ -293,26 +311,28 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 8 + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + ; ; GFX9-LABEL: name: global_sextload_i8_alig4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 8 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + ; ; GFX10-LABEL: name: global_sextload_i8_alig4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 4, addrspace 1) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(i32) = G_SEXT_INREG [[LOAD]], 8 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (invariant load (i8), align 4, addrspace 1) + S_ENDPGM 0, implicit %1(i32) ... --- name: global_zextload_i16_align4 @@ -325,29 +345,31 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX8-NEXT: S_ENDPGM 0, implicit [[AND]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX8-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + ; ; GFX9-LABEL: name: global_zextload_i16_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX9-NEXT: S_ENDPGM 0, implicit [[AND]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX9-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + ; ; GFX10-LABEL: name: global_zextload_i16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) - ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 - ; GFX10-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[AND]](s32) - %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 4, addrspace 1) - S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr(i32) = G_LOAD [[COPY]](p1) :: (invariant load (i32), addrspace 1) + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:sgpr(i32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[AND]](i32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (invariant load (i16), align 4, addrspace 1) + S_ENDPGM 0, implicit %1(i32) ... # Some negative test cases --- @@ -361,25 +383,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: constant_load_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: constant_load_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 2, addrspace 4) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i8), align 2, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_load_i16_align2 @@ -393,25 +417,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: constant_load_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: constant_load_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 2, addrspace 4) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_LOAD %0(p1) :: (invariant load (i16), addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_sextload_i8_align2 @@ -425,25 +451,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](i32) + ; ; GFX9-LABEL: name: constant_sextload_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) + ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](i32) + ; ; GFX10-LABEL: name: constant_sextload_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) + ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (invariant load (i8), align 2, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_sextload_i16_align2 @@ -457,25 +485,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](i32) + ; ; GFX9-LABEL: name: constant_sextload_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) + ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](i32) + ; ; GFX10-LABEL: name: constant_sextload_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) + ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(i32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_SEXTLOAD %0(p1) :: (invariant load (i16), addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_zextload_i8_align2 @@ -489,25 +519,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](i32) + ; ; GFX9-LABEL: name: constant_zextload_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](i32) + ; ; GFX10-LABEL: name: constant_zextload_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (i8), align 2, addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (invariant load (i8), align 2, addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: constant_zextload_i16_align2 @@ -521,25 +553,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](i32) + ; ; GFX9-LABEL: name: constant_zextload_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](i32) + ; ; GFX10-LABEL: name: constant_zextload_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (i16), addrspace 4) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_ZEXTLOAD %0(p1) :: (invariant load (i16), addrspace 4) + S_ENDPGM 0, implicit %1(i32) ... --- name: local_load_i8_align4 @@ -553,25 +587,27 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (load (i8), align 4, addrspace 3) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: local_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (load (i8), align 4, addrspace 3) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: local_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (load (i8), align 4, addrspace 3) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i8), align 4, addrspace 3) + S_ENDPGM 0, implicit %1(i32) ... --- name: private_load_i8_align4 @@ -585,23 +621,25 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (load (i8), align 4, addrspace 5) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX9-LABEL: name: private_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) - ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (load (i8), align 4, addrspace 5) + ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) + ; ; GFX10-LABEL: name: private_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) - ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(i32) = G_LOAD [[COPY1]](p1) :: (load (i8), align 4, addrspace 5) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](i32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) - S_ENDPGM 0, implicit %1 + %1:_(i32) = G_LOAD %0(p1) :: (load (i8), align 4, addrspace 5) + S_ENDPGM 0, implicit %1(i32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir index 874a8a064b481..b8c62fd127d66 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: xor_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[COPY]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_XOR %0, %1 ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: xor_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[COPY2]], [[COPY1]] + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_XOR %0, %1 ... --- @@ -49,13 +49,13 @@ body: | ; CHECK-LABEL: name: xor_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[COPY]], [[COPY2]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $sgpr0 + %2:_(i32) = G_XOR %0, %1 ... --- @@ -68,12 +68,12 @@ body: | ; CHECK-LABEL: name: xor_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[COPY]], [[COPY1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_XOR %0, %1 ... --- @@ -86,25 +86,25 @@ body: | ; CHECK-LABEL: name: xor_i1_scc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY1]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[XOR]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i1) = G_XOR %3, %4 + S_NOP 0, implicit %5(i1) ... --- @@ -117,22 +117,22 @@ body: | ; CHECK-LABEL: name: xor_i1_vcc_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY3]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(i1) = G_XOR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i1) = G_XOR %3, %4 + S_NOP 0, implicit %5(i1) ... --- @@ -145,23 +145,23 @@ body: | ; CHECK-LABEL: name: xor_i1_scc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP1]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY1]](i32), [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(i1) = G_XOR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i32) = G_CONSTANT i32 0 + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %2 + %4:_(i1) = G_ICMP intpred(ne), %1(i32), %2 + %5:_(i1) = G_XOR %3, %4 + S_NOP 0, implicit %5(i1) ... --- @@ -173,21 +173,21 @@ body: | ; CHECK-LABEL: name: xor_i1_sgpr_trunc_sgpr_trunc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_XOR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY1]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[XOR]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_TRUNC %1(i32) + %4:_(i1) = G_XOR %2, %3 + S_NOP 0, implicit %4(i1) ... @@ -200,22 +200,22 @@ body: | ; CHECK-LABEL: name: xor_i1_trunc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_ICMP intpred(ne), %0, %1 - %4:_(s1) = G_XOR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(i32) = G_ANYEXT [[TRUNC1]](i1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(i32) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(i1) = G_TRUNC [[XOR]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %4:_(i1) = G_XOR %2, %3 + S_NOP 0, implicit %4(i1) ... --- @@ -227,20 +227,20 @@ body: | ; CHECK-LABEL: name: xor_i1_s_trunc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_ICMP intpred(ne), %0, %1 - %4:_(s1) = G_XOR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY2]](i32), [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(i1) = G_XOR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $vgpr0 + %2:_(i1) = G_TRUNC %0(i32) + %3:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %4:_(i1) = G_XOR %2, %3 + S_NOP 0, implicit %4(i1) ... --- @@ -253,12 +253,12 @@ body: | ; CHECK-LABEL: name: xor_s64_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s64) = G_XOR [[COPY]], [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(i64) = G_XOR [[COPY]], [[COPY1]] + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = G_XOR %0, %1 ... --- @@ -271,16 +271,16 @@ body: | ; CHECK-LABEL: name: xor_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 - %2:_(s64) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $vgpr0_vgpr1 + %2:_(i64) = G_XOR %0, %1 ... --- @@ -293,16 +293,16 @@ body: | ; CHECK-LABEL: name: xor_s64_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $sgpr0_sgpr1 - %2:_(s64) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $sgpr0_sgpr1 + %2:_(i64) = G_XOR %0, %1 ... --- @@ -315,16 +315,16 @@ body: | ; CHECK-LABEL: name: xor_s64_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_XOR %0, %1 ... --- @@ -337,18 +337,18 @@ body: | ; CHECK-LABEL: name: xor_s64_vv_user ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_XOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV]](i64) + %0:_(i64) = COPY $vgpr0_vgpr1 + %1:_(i64) = COPY $vgpr2_vgpr3 + %2:_(i64) = G_XOR %0, %1 + S_NOP 0, implicit %2(i64) ... --- name: xor_s64_ss_ss_merge @@ -360,22 +360,22 @@ body: | ; CHECK-LABEL: name: xor_s64_ss_ss_merge ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s64) = G_XOR [[MV]], [[MV1]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %0, %1 - %5:_(s64) = G_MERGE_VALUES %2, %3 - %6:_(s64) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(i32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(i64) = G_XOR [[MV]], [[MV1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $sgpr2 + %3:_(i32) = COPY $sgpr3 + %4:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %5:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %6:_(i64) = G_XOR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -388,26 +388,26 @@ body: | ; CHECK-LABEL: name: xor_s64_vv_vv_merge ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s64) = G_MERGE_VALUES %0, %1 - %5:_(s64) = G_MERGE_VALUES %2, %3 - %6:_(s64) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY]](i32), [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i32) = COPY $vgpr2 + %3:_(i32) = COPY $vgpr3 + %4:_(i64) = G_MERGE_VALUES %0(i32), %1(i32) + %5:_(i64) = G_MERGE_VALUES %2(i32), %3(i32) + %6:_(i64) = G_XOR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -420,23 +420,23 @@ body: | ; CHECK-LABEL: name: xor_s64_s_sv_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 - %3:_(s64) = G_MERGE_VALUES %1, %2 - %4:_(s64) = G_XOR %0, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $vgpr0 + %3:_(i64) = G_MERGE_VALUES %1(i32), %2(i32) + %4:_(i64) = G_XOR %0, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -449,23 +449,23 @@ body: | ; CHECK-LABEL: name: xor_s64_s_vs_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 - %3:_(s64) = G_MERGE_VALUES %2, %1 - %4:_(s64) = G_XOR %0, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY2]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i32) = COPY $sgpr2 + %2:_(i32) = COPY $vgpr0 + %3:_(i64) = G_MERGE_VALUES %2(i32), %1(i32) + %4:_(i64) = G_XOR %0, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -478,28 +478,28 @@ body: | ; CHECK-LABEL: name: xor_s64_sv_sv_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %0, %2 - %5:_(s64) = G_MERGE_VALUES %1, %3 - %6:_(s64) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY5]](i32), [[COPY3]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %0(i32), %2(i32) + %5:_(i64) = G_MERGE_VALUES %1(i32), %3(i32) + %6:_(i64) = G_XOR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -512,28 +512,28 @@ body: | ; CHECK-LABEL: name: xor_s64_sv_vs_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %0, %2 - %5:_(s64) = G_MERGE_VALUES %3, %1 - %6:_(s64) = G_XOR %4, %5 - S_NOP 0, implicit %6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY4]](i32), [[COPY2]](i32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(i32) = COPY [[COPY1]](i32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY3]](i32), [[COPY5]](i32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](i64) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i32) = COPY $vgpr0 + %3:_(i32) = COPY $vgpr1 + %4:_(i64) = G_MERGE_VALUES %0(i32), %2(i32) + %5:_(i64) = G_MERGE_VALUES %3(i32), %1(i32) + %6:_(i64) = G_XOR %4, %5 + S_NOP 0, implicit %6(i64) ... --- @@ -546,26 +546,26 @@ body: | ; CHECK-LABEL: name: xor_chain_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:vgpr(s32) = G_XOR [[UV4]], [[UV6]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:vgpr(s32) = G_XOR [[UV5]], [[UV7]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR2]](s32), [[XOR3]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = COPY $vgpr0_vgpr1 - %3:_(s64) = G_XOR %0, %2 - %4:_(s64) = G_XOR %1, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(i64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](i64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY2]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(i32), [[UV5:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](i64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(i32), [[UV7:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[MV]](i64) + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:vgpr(i32) = G_XOR [[UV4]], [[UV6]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:vgpr(i32) = G_XOR [[UV5]], [[UV7]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[XOR2]](i32), [[XOR3]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](i64) + %0:_(i64) = COPY $sgpr0_sgpr1 + %1:_(i64) = COPY $sgpr2_sgpr3 + %2:_(i64) = COPY $vgpr0_vgpr1 + %3:_(i64) = G_XOR %0, %2 + %4:_(i64) = G_XOR %1, %3 + S_NOP 0, implicit %4(i64) ... --- @@ -578,14 +578,14 @@ body: | ; CHECK-LABEL: name: xor_v2i32_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $sgpr2_sgpr3 - %2:_(<2 x s32>) = G_XOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x i32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $sgpr2_sgpr3 + %2:_(<2 x i32>) = G_XOR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -598,18 +598,18 @@ body: | ; CHECK-LABEL: name: xor_v2i32_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %2:_(<2 x s32>) = G_XOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(i32), [[UV1:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x i32>) = G_XOR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -623,18 +623,18 @@ body: | ; CHECK-LABEL: name: xor_v2i32_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %2:_(<2 x s32>) = G_XOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(i32), [[UV3:%[0-9]+]]:sgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $sgpr0_sgpr1 + %2:_(<2 x i32>) = G_XOR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -647,18 +647,18 @@ body: | ; CHECK-LABEL: name: xor_v2i32_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_XOR %0, %1 - S_NOP 0, implicit %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(i32), [[UV1:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY]](<2 x i32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(i32), [[UV3:%[0-9]+]]:vgpr(i32) = G_UNMERGE_VALUES [[COPY1]](<2 x i32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(i32) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(i32) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x i32>) = G_BUILD_VECTOR [[XOR]](i32), [[XOR1]](i32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x i32>) + %0:_(<2 x i32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x i32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x i32>) = G_XOR %0, %1 + S_NOP 0, implicit %2(<2 x i32>) ... --- @@ -671,12 +671,12 @@ body: | ; CHECK-LABEL: name: xor_v4s16_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<4 x s16>) = G_XOR [[COPY]], [[COPY1]] - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $sgpr2_sgpr3 - %2:_(<4 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<4 x i16>) = G_XOR [[COPY]], [[COPY1]] + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $sgpr2_sgpr3 + %2:_(<4 x i16>) = G_XOR %0, %1 ... --- @@ -689,16 +689,16 @@ body: | ; CHECK-LABEL: name: xor_v4s16_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %2:_(<4 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x i16>), [[UV1:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[XOR]](<2 x i16>), [[XOR1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %1:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %2:_(<4 x i16>) = G_XOR %0, %1 ... --- @@ -711,16 +711,16 @@ body: | ; CHECK-LABEL: name: xor_v4s16_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %2:_(<4 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x i16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x i16>), [[UV3:%[0-9]+]]:sgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[XOR]](<2 x i16>), [[XOR1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $sgpr0_sgpr1 + %2:_(<4 x i16>) = G_XOR %0, %1 ... --- @@ -733,16 +733,16 @@ body: | ; CHECK-LABEL: name: xor_v4s16_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x i16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x i16>), [[UV1:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY]](<4 x i16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x i16>), [[UV3:%[0-9]+]]:vgpr(<2 x i16>) = G_UNMERGE_VALUES [[COPY1]](<4 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[UV]], [[UV2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x i16>) = G_CONCAT_VECTORS [[XOR]](<2 x i16>), [[XOR1]](<2 x i16>) + %0:_(<4 x i16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x i16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x i16>) = G_XOR %0, %1 ... --- @@ -755,12 +755,12 @@ body: | ; CHECK-LABEL: name: xor_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x i16>) = G_XOR [[COPY]], [[COPY1]] + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $sgpr1 + %2:_(<2 x i16>) = G_XOR %0, %1 ... --- @@ -773,13 +773,13 @@ body: | ; CHECK-LABEL: name: xor_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY2]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY]](<2 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[COPY2]], [[COPY1]] + %0:_(<2 x i16>) = COPY $sgpr0 + %1:_(<2 x i16>) = COPY $vgpr0 + %2:_(<2 x i16>) = G_XOR %0, %1 ... --- @@ -792,13 +792,13 @@ body: | ; CHECK-LABEL: name: xor_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY2]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x i16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x i16>) = COPY [[COPY1]](<2 x i16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[COPY]], [[COPY2]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $sgpr0 + %2:_(<2 x i16>) = G_XOR %0, %1 ... --- @@ -811,12 +811,12 @@ body: | ; CHECK-LABEL: name: xor_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_XOR %0, %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x i16>) = COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x i16>) = G_XOR [[COPY]], [[COPY1]] + %0:_(<2 x i16>) = COPY $vgpr0 + %1:_(<2 x i16>) = COPY $vgpr1 + %2:_(<2 x i16>) = G_XOR %0, %1 ... --- @@ -829,19 +829,19 @@ body: | ; CHECK-LABEL: name: xor_i1_vcc_constant ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[COPY2]] - ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s1) = G_CONSTANT i1 true - %4:_(s1) = G_XOR %2, %3 - S_NOP 0, implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[C]](i32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(ne), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[C1]](i32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(i1) = COPY [[TRUNC]](i1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(i1) = G_XOR [[ICMP]], [[COPY2]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = G_CONSTANT i32 0 + %2:_(i1) = G_ICMP intpred(ne), %0(i32), %1 + %3:_(i1) = G_CONSTANT i1 true + %4:_(i1) = G_XOR %2, %3 + S_NOP 0, implicit %4(i1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir index aa3bc8c658f2c..818eacd861496 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: zext_s32_to_s64_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s64) = G_ZEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i64) = G_ZEXT [[COPY]](i32) + %0:_(i32) = COPY $sgpr0 + %1:_(i64) = G_ZEXT %0(i32) ... --- @@ -28,12 +28,12 @@ body: | ; CHECK-LABEL: name: zext_s16_to_s64_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i64) = G_ZEXT [[TRUNC]](i16) + %0:_(i32) = COPY $sgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_ZEXT %1(i16) ... --- @@ -46,12 +46,12 @@ body: | ; CHECK-LABEL: name: zext_s32_to_s64_v ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s64) = G_ZEXT %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY [[COPY]](i32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[COPY1]](i32), [[C]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i64) = G_ZEXT %0(i32) ... --- @@ -64,15 +64,15 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s16_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_ZEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i16) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_ZEXT %2(i1) ... --- @@ -85,15 +85,15 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s32_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s32) = G_ZEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i32) = G_ZEXT %2(i1) ... --- @@ -106,15 +106,15 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s64_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_ZEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(i32) = COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(i32) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[ICMP]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i64) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i32) = COPY $sgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_ZEXT %2(i1) ... --- @@ -127,17 +127,17 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s16_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[SELECT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s16) = G_ZEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[SELECT]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i16) = G_ZEXT %2(i1) ... --- @@ -150,16 +150,16 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s32_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s32) = G_ZEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i32) = G_ZEXT %2(i1) ... --- @@ -172,18 +172,18 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s64_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C2]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s1) = G_ICMP intpred(eq), %0, %1 - %3:_(s64) = G_ZEXT %2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(i32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(i1) = G_ICMP intpred(eq), [[COPY]](i32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(i32) = G_SELECT [[ICMP]](i1), [[C]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[SELECT]](i32), [[C2]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i32) = COPY $vgpr1 + %2:_(i1) = G_ICMP intpred(eq), %0(i32), %1 + %3:_(i64) = G_ZEXT %2(i1) ... --- @@ -196,12 +196,12 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s16_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i16) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i16) = G_ZEXT %1(i1) ... --- @@ -214,12 +214,12 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s32_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i32) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_ZEXT %1(i1) ... --- @@ -232,12 +232,12 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s64_sgpr ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(i32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(i64) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $sgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_ZEXT %1(i1) ... --- @@ -250,12 +250,12 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s16_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s16) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s16) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i16) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i16) = G_ZEXT %1(i1) ... --- @@ -268,12 +268,12 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s32_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s32) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[TRUNC]](i1) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i32) = G_ZEXT %1(i1) ... --- @@ -286,14 +286,14 @@ body: | ; CHECK-LABEL: name: zext_s1_to_s64_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s1) = G_TRUNC %0 - %2:_(s64) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i1) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[TRUNC]](i1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[ZEXT]](i32), [[C]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i1) = G_TRUNC %0(i32) + %2:_(i64) = G_ZEXT %1(i1) ... --- @@ -306,12 +306,12 @@ body: | ; CHECK-LABEL: name: zext_s16_to_s64_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s64) = G_ZEXT %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(i32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(i16) = G_TRUNC [[COPY]](i32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(i32) = G_ZEXT [[TRUNC]](i16) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(i32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(i64) = G_MERGE_VALUES [[ZEXT]](i32), [[C]](i32) + %0:_(i32) = COPY $vgpr0 + %1:_(i16) = G_TRUNC %0(i32) + %2:_(i64) = G_ZEXT %1(i16) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir index 29db4cf9eedf5..987feab3dc357 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir @@ -13,10 +13,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (i8), addrspace 4) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 4, align 1) + %1:_(i32) = G_ZEXTLOAD %0(p4) :: (load (i8), addrspace 4) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (i8), addrspace 1) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1, align 1) + %1:_(i32) = G_ZEXTLOAD %0(p4) :: (load (i8), addrspace 1) ... --- @@ -51,10 +51,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (i16), addrspace 4) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 4, align 2) + %1:_(i32) = G_ZEXTLOAD %0(p4) :: (load (i16), addrspace 4) ... --- @@ -70,10 +70,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (i16), addrspace 1) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1, align 2) + %1:_(i32) = G_ZEXTLOAD %0(p4) :: (load (i16), addrspace 1) ... --- @@ -88,10 +88,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (i8), addrspace 3) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3, align 1) + %1:_(i32) = G_ZEXTLOAD %0(p3) :: (load (i8), addrspace 3) ... --- @@ -107,8 +107,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(i32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (i16), addrspace 3) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(i32) = G_AMDGPU_READANYLANE [[ZEXTLOAD]] %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3, align 2) + %1:_(i32) = G_ZEXTLOAD %0(p3) :: (load (i16), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index a5711418a8000..fba24fd67b827 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -62,9 +62,9 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32) from %ir.ptr0, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(f32) = G_LOAD [[COPY]](p4) :: (load (f32) from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr0) + %1:_(f32) = G_LOAD %0(p4) :: (load (f32) from %ir.ptr0, addrspace 4) ... --- @@ -78,9 +78,9 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (volatile load (s32) from %ir.ptr0, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(f32) = G_LOAD [[COPY]](p4) :: (volatile load (f32) from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile load (s32) from %ir.ptr0) + %1:_(f32) = G_LOAD %0(p4) :: (volatile load (f32) from %ir.ptr0, addrspace 4) ... --- @@ -94,9 +94,9 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(f32) = G_LOAD [[COPY]](p1) :: (invariant load (f32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load (s32) from %ir.ptr1) + %1:_(f32) = G_LOAD %0(p1) :: (invariant load (f32) from %ir.ptr1, addrspace 1) ... --- @@ -111,9 +111,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(f32) = G_LOAD [[COPY1]](p1) :: (load (f32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) + %1:_(f32) = G_LOAD %0(p1) :: (load (f32) from %ir.ptr1, addrspace 1) ... --- @@ -128,9 +128,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(f32) = G_LOAD [[COPY1]](p1) :: (load (f32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) + %1:_(f32) = G_LOAD %0(p1) :: (load (f32) from %ir.ptr1, addrspace 1) ... --- @@ -145,9 +145,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(f32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load (f32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile invariant load (s32) from %ir.ptr1) + %1:_(f32) = G_LOAD %0(p1) :: (volatile invariant load (f32) from %ir.ptr1, addrspace 1) ... --- @@ -162,9 +162,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(f32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire (f32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load acquire (s32) from %ir.ptr1) + %1:_(f32) = G_LOAD %0(p1) :: (invariant load acquire (f32) from %ir.ptr1, addrspace 1) ... --- @@ -179,9 +179,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.tmp1, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(f32) = G_LOAD [[COPY1]](p1) :: (load (f32) from %ir.tmp1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) + %1:_(f32) = G_LOAD %0(p1) :: (load (f32) from %ir.tmp1, addrspace 1) ... --- @@ -192,8 +192,8 @@ body: | bb.0: ; CHECK-LABEL: name: load_constant_v4i16_from_8_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>) from %ir.ptr0, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x f16>) = G_LOAD [[COPY]](p4) :: (load (<4 x f16>) from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>) from %ir.ptr0, align 8, addrspace 4) + %1:_(<4 x f16>) = G_LOAD %0(p4) :: (load (<4 x f16>) from %ir.ptr0, addrspace 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir index 75148ecff5377..7b4c6d3395cf3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir @@ -75,52 +75,52 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_12]] ; CHECK-NEXT: $vgpr1 = COPY [[V_CNDMASK_B32_e64_13]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %2:vgpr_32 = COPY $vgpr0 - %3:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr9 - %12:vgpr_32 = COPY $vgpr10 - %13:vgpr_32 = COPY $vgpr11 - %14:vgpr_32 = COPY $vgpr12 - %15:vgpr_32 = COPY $vgpr13 - %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 - %17:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1 - %18:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %7, %subreg.sub1 - %19:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1 - %20:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %11, %subreg.sub1 - %21:vreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1 - %22:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1 - %1:vgpr_32 = COPY $vgpr14 - %34:vreg_512 = REG_SEQUENCE %16, %subreg.sub0_sub1, %17, %subreg.sub2_sub3, %18, %subreg.sub4_sub5, %19, %subreg.sub6_sub7, %20, %subreg.sub8_sub9, %21, %subreg.sub10_sub11, %22, %subreg.sub12_sub13, undef %35:vreg_64, %subreg.sub14_sub15 - %55:sreg_64_xexec = V_CMP_EQ_U32_e64 1, %1, implicit $exec - %56:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub0, 0, %34.sub2, %55, implicit $exec - %57:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub1, 0, %34.sub3, %55, implicit $exec - %59:sreg_64_xexec = V_CMP_EQ_U32_e64 2, %1, implicit $exec - %60:vgpr_32 = V_CNDMASK_B32_e64 0, %56, 0, %34.sub4, %59, implicit $exec - %61:vgpr_32 = V_CNDMASK_B32_e64 0, %57, 0, %34.sub5, %59, implicit $exec - %63:sreg_64_xexec = V_CMP_EQ_U32_e64 3, %1, implicit $exec - %64:vgpr_32 = V_CNDMASK_B32_e64 0, %60, 0, %34.sub6, %63, implicit $exec - %65:vgpr_32 = V_CNDMASK_B32_e64 0, %61, 0, %34.sub7, %63, implicit $exec - %67:sreg_64_xexec = V_CMP_EQ_U32_e64 4, %1, implicit $exec - %68:vgpr_32 = V_CNDMASK_B32_e64 0, %64, 0, %34.sub8, %67, implicit $exec - %69:vgpr_32 = V_CNDMASK_B32_e64 0, %65, 0, %34.sub9, %67, implicit $exec - %71:sreg_64_xexec = V_CMP_EQ_U32_e64 5, %1, implicit $exec - %72:vgpr_32 = V_CNDMASK_B32_e64 0, %68, 0, %34.sub10, %71, implicit $exec - %73:vgpr_32 = V_CNDMASK_B32_e64 0, %69, 0, %34.sub11, %71, implicit $exec - %75:sreg_64_xexec = V_CMP_EQ_U32_e64 6, %1, implicit $exec - %76:vgpr_32 = V_CNDMASK_B32_e64 0, %72, 0, %34.sub12, %75, implicit $exec - %77:vgpr_32 = V_CNDMASK_B32_e64 0, %73, 0, %34.sub13, %75, implicit $exec - %79:sreg_64_xexec = V_CMP_EQ_U32_e64 7, %1, implicit $exec - %80:vgpr_32 = V_CNDMASK_B32_e64 0, %76, 0, %34.sub14, %79, implicit $exec - %81:vgpr_32 = V_CNDMASK_B32_e64 0, %77, 0, %34.sub15, %79, implicit $exec - $vgpr0 = COPY %80 - $vgpr1 = COPY %81 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = COPY $vgpr2 + %3:vgpr_32 = COPY $vgpr3 + %4:vgpr_32 = COPY $vgpr4 + %5:vgpr_32 = COPY $vgpr5 + %6:vgpr_32 = COPY $vgpr6 + %7:vgpr_32 = COPY $vgpr7 + %8:vgpr_32 = COPY $vgpr8 + %9:vgpr_32 = COPY $vgpr9 + %10:vgpr_32 = COPY $vgpr10 + %11:vgpr_32 = COPY $vgpr11 + %12:vgpr_32 = COPY $vgpr12 + %13:vgpr_32 = COPY $vgpr13 + %14:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %15:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %16:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1 + %17:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %7, %subreg.sub1 + %18:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1 + %19:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %11, %subreg.sub1 + %20:vreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1 + %21:vgpr_32 = COPY $vgpr14 + %22:vreg_512 = REG_SEQUENCE %14, %subreg.sub0_sub1, %15, %subreg.sub2_sub3, %16, %subreg.sub4_sub5, %17, %subreg.sub6_sub7, %18, %subreg.sub8_sub9, %19, %subreg.sub10_sub11, %20, %subreg.sub12_sub13, undef %23:vreg_64, %subreg.sub14_sub15 + %24:sreg_64_xexec = V_CMP_EQ_U32_e64 1, %21, implicit $exec + %25:vgpr_32 = V_CNDMASK_B32_e64 0, %22.sub0, 0, %22.sub2, %24, implicit $exec + %26:vgpr_32 = V_CNDMASK_B32_e64 0, %22.sub1, 0, %22.sub3, %24, implicit $exec + %27:sreg_64_xexec = V_CMP_EQ_U32_e64 2, %21, implicit $exec + %28:vgpr_32 = V_CNDMASK_B32_e64 0, %25, 0, %22.sub4, %27, implicit $exec + %29:vgpr_32 = V_CNDMASK_B32_e64 0, %26, 0, %22.sub5, %27, implicit $exec + %30:sreg_64_xexec = V_CMP_EQ_U32_e64 3, %21, implicit $exec + %31:vgpr_32 = V_CNDMASK_B32_e64 0, %28, 0, %22.sub6, %30, implicit $exec + %32:vgpr_32 = V_CNDMASK_B32_e64 0, %29, 0, %22.sub7, %30, implicit $exec + %33:sreg_64_xexec = V_CMP_EQ_U32_e64 4, %21, implicit $exec + %34:vgpr_32 = V_CNDMASK_B32_e64 0, %31, 0, %22.sub8, %33, implicit $exec + %35:vgpr_32 = V_CNDMASK_B32_e64 0, %32, 0, %22.sub9, %33, implicit $exec + %36:sreg_64_xexec = V_CMP_EQ_U32_e64 5, %21, implicit $exec + %37:vgpr_32 = V_CNDMASK_B32_e64 0, %34, 0, %22.sub10, %36, implicit $exec + %38:vgpr_32 = V_CNDMASK_B32_e64 0, %35, 0, %22.sub11, %36, implicit $exec + %39:sreg_64_xexec = V_CMP_EQ_U32_e64 6, %21, implicit $exec + %40:vgpr_32 = V_CNDMASK_B32_e64 0, %37, 0, %22.sub12, %39, implicit $exec + %41:vgpr_32 = V_CNDMASK_B32_e64 0, %38, 0, %22.sub13, %39, implicit $exec + %42:sreg_64_xexec = V_CMP_EQ_U32_e64 7, %21, implicit $exec + %43:vgpr_32 = V_CNDMASK_B32_e64 0, %40, 0, %22.sub14, %42, implicit $exec + %44:vgpr_32 = V_CNDMASK_B32_e64 0, %41, 0, %22.sub15, %42, implicit $exec + $vgpr0 = COPY %43 + $vgpr1 = COPY %44 SI_RETURN implicit $vgpr0, implicit $vgpr1 ...